+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/********************************************************************
* COPYRIGHT:
- * Copyright (c) 1997-2003, International Business Machines Corporation and
+ * Copyright (c) 1997-2016, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
-/********************************************************************************
+/*******************************************************************************
*
-* File CCONVTST.C
+* File nucnvtst.c
*
* Modification History:
* Name Description
* Steven R. Loomis 7/8/1999 Adding input buffer test
-*********************************************************************************
+********************************************************************************
*/
#include <stdio.h>
#include "cstring.h"
#include "unicode/uloc.h"
#include "unicode/ucnv.h"
#include "unicode/ucnv_err.h"
+#include "unicode/ucnv_cb.h"
#include "cintltst.h"
#include "unicode/utypes.h"
#include "unicode/ustring.h"
#include "unicode/ucol.h"
+#include "unicode/utf16.h"
#include "cmemory.h"
+#include "nucnvtst.h"
-static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const uint32_t results[], const char* message);
+static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
#if !UCONFIG_NO_COLLATION
static void TestJitterbug981(void);
#endif
+#if !UCONFIG_NO_LEGACY_CONVERSION
static void TestJitterbug1293(void);
+#endif
static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
static void TestConverterTypesAndStarters(void);
static void TestAmbiguous(void);
static void TestUTF32BE(void);
static void TestUTF32LE(void);
static void TestLATIN1(void);
+
+#if !UCONFIG_NO_LEGACY_CONVERSION
static void TestSBCS(void);
static void TestDBCS(void);
static void TestMBCS(void);
+#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
+static void TestICCRunout(void);
+#endif
+
+#ifdef U_ENABLE_GENERIC_ISO_2022
static void TestISO_2022(void);
+#endif
+
static void TestISO_2022_JP(void);
static void TestISO_2022_JP_1(void);
static void TestISO_2022_JP_2(void);
static void TestISO_2022_KR(void);
static void TestISO_2022_KR_1(void);
static void TestISO_2022_CN(void);
+#if 0
+ /*
+ * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
+ */
static void TestISO_2022_CN_EXT(void);
+#endif
static void TestJIS(void);
static void TestHZ(void);
+#endif
+
static void TestSCSU(void);
+
+#if !UCONFIG_NO_LEGACY_CONVERSION
static void TestEBCDIC_STATEFUL(void);
static void TestGB18030(void);
static void TestLMBCS(void);
static void TestJitterbug255(void);
static void TestEBCDICUS4XML(void);
+#if 0
+ /*
+ * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
+ */
static void TestJitterbug915(void);
+#endif
static void TestISCII(void);
+
+static void TestCoverageMBCS(void);
+static void TestJitterbug2346(void);
+static void TestJitterbug2411(void);
+static void TestJB5275(void);
+static void TestJB5275_1(void);
+static void TestJitterbug6175(void);
+
+static void TestIsFixedWidth(void);
+#endif
+
+static void TestInBufSizes(void);
+
+static void TestRoundTrippingAllUTF(void);
static void TestConv(const uint16_t in[],
int len,
const char* conv,
const char* lang,
char byteArr[],
int byteArrLen);
-static void TestRoundTrippingAllUTF(void);
-static void TestCoverageMBCS(void);
-static void TestJitterbug2346(void);
-static void TestJitterbug2411(void);
-void addTestNewConvert(TestNode** root);
/* open a converter, using test data if it begins with '@' */
static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
{
if(cnv && cnv[0] == '@') {
- return ucnv_openPackage("testdata", cnv+1, err);
+ return ucnv_openPackage(loadTestData(err), cnv+1, err);
} else {
return ucnv_open(cnv, err);
}
}
static void
-TestNextUChar(UConverter* cnv, const char* source, const char* limit, const uint32_t results[], const char* message)
+TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
{
const char* s0;
const char* s=(char*)source;
- const uint32_t *r=results;
+ const int32_t *r=results;
UErrorCode errorCode=U_ZERO_ERROR;
- uint32_t c;
+ UChar32 c;
while(s<limit) {
s0=s;
} else if(U_FAILURE(errorCode)) {
log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
break;
- } else if((uint32_t)(s-s0)!=*r || c!=*(r+1)) {
+ } else if(
+ /* test the expected number of input bytes only if >=0 */
+ (*r>=0 && (int32_t)(s-s0)!=*r) ||
+ c!=*(r+1)
+ ) {
log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
message, c, (s-s0), *(r+1), *r);
break;
void addTestNewConvert(TestNode** root)
{
+#if !UCONFIG_NO_FILE_IO
addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
+#endif
addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
+
+ /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
+
+#if !UCONFIG_NO_LEGACY_CONVERSION
+ addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
+#endif
+
addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
+
+#if !UCONFIG_NO_LEGACY_CONVERSION
addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
+#if !UCONFIG_NO_FILE_IO
addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
+ addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout");
+#endif
addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
+
+#ifdef U_ENABLE_GENERIC_ISO_2022
addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
+#endif
+
addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
+ /*
+ * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
+ */
addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
+#endif
+
addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
+
+#if !UCONFIG_NO_LEGACY_CONVERSION
addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
- addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
+ addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275");
+ addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1");
#if !UCONFIG_NO_COLLATION
addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
#endif
+
addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
+#endif
+
+
+#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
+#endif
+
addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
+
+#if !UCONFIG_NO_LEGACY_CONVERSION
addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
+ addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
+ addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth");
+#endif
}
static void setNuConvTestName(const char *codepage, const char *direction)
{
- sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
- codepage,
- direction,
- gInBufferSize,
- gOutBufferSize);
+ sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
+ codepage,
+ direction,
+ (int)gInBufferSize,
+ (int)gOutBufferSize);
}
typedef enum
{
UErrorCode status = U_ZERO_ERROR;
UConverter *conv = 0;
- uint8_t junkout[NEW_MAX_BUFFER]; /* FIX */
+ char junkout[NEW_MAX_BUFFER]; /* FIX */
int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
- uint8_t *p;
+ char *p;
const UChar *src;
- uint8_t *end;
- uint8_t *targ;
+ char *end;
+ char *targ;
int32_t *offs;
int i;
int32_t realBufferSize;
- uint8_t *realBufferEnd;
+ char *realBufferEnd;
const UChar *realSourceEnd;
const UChar *sourceLimit;
UBool checkOffsets = TRUE;
UBool doFlush;
for(i=0;i<NEW_MAX_BUFFER;i++)
- junkout[i] = 0xF0;
+ junkout[i] = (char)0xF0;
for(i=0;i<NEW_MAX_BUFFER;i++)
junokout[i] = 0xFF;
targ = junkout;
offs = junokout;
- realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
+ realBufferSize = UPRV_LENGTHOF(junkout);
realBufferEnd = junkout + realBufferSize;
realSourceEnd = source + sourceLen;
status = U_ZERO_ERROR;
ucnv_fromUnicode (conv,
- (char **)&targ,
- (const char*)end,
+ &targ,
+ end,
&src,
sourceLimit,
checkOffsets ? offs : NULL,
log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
sourceLen, targ-junkout);
- if(VERBOSITY)
+ if(getTestOption(VERBOSITY_OPTION))
{
char junk[9999];
char offset_str[9999];
- uint8_t *ptr;
+ char *ptr;
junk[0] = 0;
offset_str[0] = 0;
if(expectLen != targ-junkout) {
log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
- printf("\nGot:");
- printSeqErr((const unsigned char*)junkout, targ-junkout);
- printf("\nExpected:");
+ fprintf(stderr, "Got:\n");
+ printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
+ fprintf(stderr, "Expected:\n");
printSeqErr((const unsigned char*)expect, expectLen);
return TC_MISMATCH;
}
log_verbose("comparing %d offsets..\n", targ-junkout);
if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
log_err("did not get the expected offsets. %s\n", gNuConvTestName);
- printSeqErr((const unsigned char*)junkout, targ-junkout);
+ printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
log_err("\n");
log_err("Got : ");
for(p=junkout;p<targ;p++) {
} else {
log_err("String does not match u->%s\n", gNuConvTestName);
printUSeqErr(source, sourceLen);
- printf("\nGot:");
+ fprintf(stderr, "Got:\n");
printSeqErr((const unsigned char *)junkout, expectLen);
- printf("\nExpected:");
+ fprintf(stderr, "Expected:\n");
printSeqErr((const unsigned char *)expect, expectLen);
return TC_MISMATCH;
UConverter *conv = 0;
UChar junkout[NEW_MAX_BUFFER]; /* FIX */
int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
- const uint8_t *src;
- const uint8_t *realSourceEnd;
- const uint8_t *srcLimit;
+ const char *src;
+ const char *realSourceEnd;
+ const char *srcLimit;
UChar *p;
UChar *targ;
UChar *end;
}
log_verbose("Converter opened..\n");
- src = source;
+ src = (const char *)source;
targ = junkout;
offs = junokout;
- realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
+ realBufferSize = UPRV_LENGTHOF(junkout);
realBufferEnd = junkout + realBufferSize;
realSourceEnd = src + sourcelen;
ucnv_toUnicode (conv,
&targ,
end,
- (const char **)&src,
- (const char *)srcLimit,
+ &src,
+ srcLimit,
checkOffsets ? offs : NULL,
(UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
&status);
log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
sourcelen, targ-junkout);
- if(VERBOSITY)
+ if(getTestOption(VERBOSITY_OPTION))
{
char junk[9999];
char offset_str[9999];
}
log_err("\n");
log_err("input: ");
- for(i=0; i<(src-source); i++) {
+ for(i=0; i<(src-(const char *)source); i++) {
log_err("%X,", (unsigned char)source[i]);
}
log_err("\n");
{
/** test chars #1 */
/* 1 2 3 1Han 2Han 3Han . */
- UChar sampleText[] =
- { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E };
+ static const UChar sampleText[] =
+ { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
+ static const UChar sampleTextRoundTripUnmappable[] =
+ { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
- const uint8_t expectedUTF8[] =
- { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
- int32_t toUTF8Offs[] =
- { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07};
- int32_t fmUTF8Offs[] =
- { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };
+ static const uint8_t expectedUTF8[] =
+ { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
+ static const int32_t toUTF8Offs[] =
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
+ static const int32_t fmUTF8Offs[] =
+ { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
+#ifdef U_ENABLE_GENERIC_ISO_2022
/* Same as UTF8, but with ^[%B preceeding */
- const uint8_t expectedISO2022[] =
+ static const const uint8_t expectedISO2022[] =
{ 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
- int32_t toISO2022Offs[] =
+ static const int32_t toISO2022Offs[] =
{ -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
- int32_t fmISO2022Offs[] =
+ static const int32_t fmISO2022Offs[] =
{ 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
+#endif
/* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
- const uint8_t expectedIBM930[] =
- { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B };
- int32_t toIBM930Offs[] =
- { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, };
- int32_t fmIBM930Offs[] =
- { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c};
+ static const uint8_t expectedIBM930[] =
+ { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
+ static const int32_t toIBM930Offs[] =
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
+ static const int32_t fmIBM930Offs[] =
+ { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
/* 1 2 3 0 h1 h2 h3 . MBCS*/
- const uint8_t expectedIBM943[] =
- { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e };
- int32_t toIBM943Offs [] =
- { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07 };
- int32_t fmIBM943Offs[] =
- { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a};
+ static const uint8_t expectedIBM943[] =
+ { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
+ static const int32_t toIBM943Offs [] =
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
+ static const int32_t fmIBM943Offs[] =
+ { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
/* 1 2 3 0 h1 h2 h3 . DBCS*/
- const uint8_t expectedIBM9027[] =
- { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe};
- int32_t toIBM9027Offs [] =
- { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
+ static const uint8_t expectedIBM9027[] =
+ { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
+ static const int32_t toIBM9027Offs [] =
+ { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
/* 1 2 3 0 <?> <?> <?> . SBCS*/
- const uint8_t expectedIBM920[] =
- { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e };
- int32_t toIBM920Offs [] =
- { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
+ static const uint8_t expectedIBM920[] =
+ { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
+ static const int32_t toIBM920Offs [] =
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
/* 1 2 3 0 <?> <?> <?> . SBCS*/
- const uint8_t expectedISO88593[] =
- { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
- int32_t toISO88593Offs[] =
- { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
+ static const uint8_t expectedISO88593[] =
+ { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
+ static const int32_t toISO88593Offs[] =
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
- /* 1 2 3 0 <?> <?> <?> . LATIN_1*/
- const uint8_t expectedLATIN1[] =
- { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
- int32_t toLATIN1Offs[] =
- { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
+ /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
+ static const uint8_t expectedLATIN1[] =
+ { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
+ static const int32_t toLATIN1Offs[] =
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
/* etc */
- const uint8_t expectedUTF16BE[] =
- { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e };
- int32_t toUTF16BEOffs[]=
- { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
- int32_t fmUTF16BEOffs[] =
- { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e };
-
- const uint8_t expectedUTF16LE[] =
- { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00 };
- int32_t toUTF16LEOffs[]=
- { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
- int32_t fmUTF16LEOffs[] =
- { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e };
-
- const uint8_t expectedUTF32BE[] =
+ static const uint8_t expectedUTF16BE[] =
+ { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
+ static const int32_t toUTF16BEOffs[]=
+ { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
+ static const int32_t fmUTF16BEOffs[] =
+ { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
+
+ static const uint8_t expectedUTF16LE[] =
+ { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
+ static const int32_t toUTF16LEOffs[]=
+ { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
+ static const int32_t fmUTF16LEOffs[] =
+ { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
+
+ static const uint8_t expectedUTF32BE[] =
{ 0x00, 0x00, 0x00, 0x31,
0x00, 0x00, 0x00, 0x32,
0x00, 0x00, 0x00, 0x33,
0x00, 0x00, 0x4e, 0x00,
0x00, 0x00, 0x4e, 0x8c,
0x00, 0x00, 0x4e, 0x09,
- 0x00, 0x00, 0x00, 0x2e };
- int32_t toUTF32BEOffs[]=
+ 0x00, 0x00, 0x00, 0x2e,
+ 0x00, 0x02, 0x00, 0x21 };
+ static const int32_t toUTF32BEOffs[]=
{ 0x00, 0x00, 0x00, 0x00,
0x01, 0x01, 0x01, 0x01,
0x02, 0x02, 0x02, 0x02,
0x05, 0x05, 0x05, 0x05,
0x06, 0x06, 0x06, 0x06,
0x07, 0x07, 0x07, 0x07,
+ 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08 };
- int32_t fmUTF32BEOffs[] =
- { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c };
+ static const int32_t fmUTF32BEOffs[] =
+ { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
- const uint8_t expectedUTF32LE[] =
+ static const uint8_t expectedUTF32LE[] =
{ 0x31, 0x00, 0x00, 0x00,
0x32, 0x00, 0x00, 0x00,
0x33, 0x00, 0x00, 0x00,
0x00, 0x4e, 0x00, 0x00,
0x8c, 0x4e, 0x00, 0x00,
0x09, 0x4e, 0x00, 0x00,
- 0x2e, 0x00, 0x00, 0x00 };
- int32_t toUTF32LEOffs[]=
+ 0x2e, 0x00, 0x00, 0x00,
+ 0x21, 0x00, 0x02, 0x00 };
+ static const int32_t toUTF32LEOffs[]=
{ 0x00, 0x00, 0x00, 0x00,
0x01, 0x01, 0x01, 0x01,
0x02, 0x02, 0x02, 0x02,
0x05, 0x05, 0x05, 0x05,
0x06, 0x06, 0x06, 0x06,
0x07, 0x07, 0x07, 0x07,
+ 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08 };
- int32_t fmUTF32LEOffs[] =
- { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c };
+ static const int32_t fmUTF32LEOffs[] =
+ { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
/** Test chars #2 **/
/* Sahha [health], slashed h's */
- const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
- const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
+ static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
+ static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
/* LMBCS */
- const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
- const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
- int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
- int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
+ static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
+ static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
+ static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
+ static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
/*********************************** START OF CODE finally *************/
- gInBufferSize = insize;
- gOutBufferSize = outsize;
+ gInBufferSize = insize;
+ gOutBufferSize = outsize;
- log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
+ log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
-#if 1
/*UTF-8*/
- testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
+ testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
log_verbose("Test surrogate behaviour for UTF8\n");
{
- const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
- const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
+ static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
+ static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
0xf0, 0x90, 0x90, 0x81,
0xef, 0xbf, 0xbd
};
- int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
- testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]),
+ static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
+ testConvertFromU(testinput, UPRV_LENGTHOF(testinput),
expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
}
+
+#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
/*ISO-2022*/
- testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
+ testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
+#endif
+
/*UTF16 LE*/
- testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
+ testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
/*UTF16 BE*/
- testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
+ testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
/*UTF32 LE*/
- testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
+ testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
/*UTF32 BE*/
- testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
+ testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
+
/*LATIN_1*/
- testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
+ testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
+
+#if !UCONFIG_NO_LEGACY_CONVERSION
/*EBCDIC_STATEFUL*/
- testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
+ testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
- testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
+ testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
/*MBCS*/
- testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
+ testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
/*DBCS*/
- testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
+ testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
/*SBCS*/
- testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
+ testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
/*SBCS*/
- testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
+ testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
+#endif
/****/
-#endif
-#if 1
/*UTF-8*/
testConvertToU(expectedUTF8, sizeof(expectedUTF8),
- sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE);
+ sampleText, UPRV_LENGTHOF(sampleText), "utf8", fmUTF8Offs,FALSE);
+#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
/*ISO-2022*/
testConvertToU(expectedISO2022, sizeof(expectedISO2022),
- sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE);
+ sampleText, UPRV_LENGTHOF(sampleText), "ISO_2022", fmISO2022Offs,FALSE);
+#endif
+
/*UTF16 LE*/
testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
- sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
+ sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE);
/*UTF16 BE*/
testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
- sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE);
+ sampleText, UPRV_LENGTHOF(sampleText), "utf-16be", fmUTF16BEOffs,FALSE);
/*UTF32 LE*/
testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
- sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE);
+ sampleText, UPRV_LENGTHOF(sampleText), "utf-32le", fmUTF32LEOffs,FALSE);
/*UTF32 BE*/
testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
- sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE);
+ sampleText, UPRV_LENGTHOF(sampleText), "utf-32be", fmUTF32BEOffs,FALSE);
+
+#if !UCONFIG_NO_LEGACY_CONVERSION
/*EBCDIC_STATEFUL*/
- testConvertToU(expectedIBM930, sizeof(expectedIBM930),
- sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-930", fmIBM930Offs,FALSE);
+ testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable,
+ UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-930", fmIBM930Offs,FALSE);
/*MBCS*/
- testConvertToU(expectedIBM943, sizeof(expectedIBM943),
- sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-943", fmIBM943Offs,FALSE);
+ testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable,
+ UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-943", fmIBM943Offs,FALSE);
+#endif
/* Try it again to make sure it still works */
testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
- sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
+ sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE);
+#if !UCONFIG_NO_LEGACY_CONVERSION
testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
- malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE);
+ malteseUChars, UPRV_LENGTHOF(malteseUChars), "latin3", NULL,FALSE);
- testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]),
+ testConvertFromU(malteseUChars, UPRV_LENGTHOF(malteseUChars),
expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
/*LMBCS*/
- testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]),
+ testConvertFromU(LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars),
expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
- LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE);
+ LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars), "LMBCS-1", fmLMBCSOffs,FALSE);
+#endif
/* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
{
Hi Mom -+Jjo--!
A+ImIDkQ.
+-
- +ZeVnLIqe
+ +ZeVnLIqe-
*/
0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
0x2b, 0x2d,
- 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
+ 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
};
static const UChar unicode[] = {
/*
0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
11, 12, 12, 12, 13, 13, 13, 13, 14,
15, 15,
- 16, 16, 16, 17, 17, 17, 18, 18, 18
+ 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
};
/* same but escaping set O (the exclamation mark) */
Hi Mom -+Jjo--+ACE-
A+ImIDkQ.
+-
- +ZeVnLIqe
+ +ZeVnLIqe-
*/
0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
0x2b, 0x2d,
- 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
+ 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
};
static const int32_t toUnicodeOffsetsR[] = {
0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
11, 12, 12, 12, 13, 13, 13, 13, 14,
15, 15,
- 16, 16, 16, 17, 17, 17, 18, 18, 18
+ 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
};
- testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
+ testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
- testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE);
+ testConvertToU(utf7, sizeof(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7", toUnicodeOffsets,FALSE);
- testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
+ testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
- testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
+ testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, UPRV_LENGTHOF(unicode), "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
}
/*
35, 36, 36, 36, 37, 37, 37, 37, 37
};
- testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
+ testConvertFromU(unicode, UPRV_LENGTHOF(unicode), imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
- testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
+ testConvertToU(imap, sizeof(imap), unicode, UPRV_LENGTHOF(unicode), "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
}
/* Test UTF-8 bad data handling*/
0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */
0xdf, 0xbf, /* 7ff */
0xbf, /* truncated tail */
- 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */
+ 0xf4, 0x90, 0x80, 0x80, /* 110000 */
0x02
};
static const uint16_t utf8Expected[]={
0x0061,
- 0xfffd,
+ 0xfffd, 0xfffd, 0xfffd, 0xfffd,
0x0000,
0x0062,
- 0xfffd,
- 0xfffd,
+ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
+ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
0xdbff, 0xdfff,
0x07ff,
0xfffd,
- 0xfffd,
+ 0xfffd, 0xfffd, 0xfffd, 0xfffd,
0x0002
};
static const int32_t utf8Offsets[]={
- 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
+ 0,
+ 1, 2, 3, 4,
+ 5,
+ 6,
+ 7, 8, 9, 10, 11,
+ 12, 13, 14, 15, 16,
+ 17, 17,
+ 21,
+ 23,
+ 24, 25, 26, 27,
+ 28
};
testConvertToU(utf8, sizeof(utf8),
- utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE);
+ utf8Expected, UPRV_LENGTHOF(utf8Expected), "utf-8", utf8Offsets ,FALSE);
}
0x00, 0x00, 0x01, 0x62,
0x00, 0x00, 0x02, 0x62
};
-
static const uint16_t utf32Expected[]={
0x0061,
0xfffd, /* 0x110000 out of range */
0x0162,
0x0262
};
-
static const int32_t utf32Offsets[]={
0, 4, 8, 8, 12, 16, 20, 24, 28
};
- testConvertToU(utf32, sizeof(utf32),
- utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE);
+ static const uint8_t utf32ExpectedBack[]={
+ 0x00, 0x00, 0x00, 0x61,
+ 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */
+ 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
+ 0x00, 0x00, 0x00, 0x62,
+ 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */
+ 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */
+ 0x00, 0x00, 0x01, 0x62,
+ 0x00, 0x00, 0x02, 0x62
+ };
+ static const int32_t utf32OffsetsBack[]={
+ 0,0,0,0,
+ 1,1,1,1,
+ 2,2,2,2,
+ 4,4,4,4,
+ 5,5,5,5,
+ 6,6,6,6,
+ 7,7,7,7,
+ 8,8,8,8
+ };
+ testConvertToU(utf32, sizeof(utf32),
+ utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32be", utf32Offsets ,FALSE);
+ testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
+ utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE);
}
/* Test UTF-32LE bad data handling*/
0x0162,
0x0262
};
-
static const int32_t utf32Offsets[]={
0, 4, 8, 8, 12, 16, 20, 24, 28
};
+ static const uint8_t utf32ExpectedBack[]={
+ 0x61, 0x00, 0x00, 0x00,
+ 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */
+ 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
+ 0x62, 0x00, 0x00, 0x00,
+ 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */
+ 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */
+ 0x62, 0x01, 0x00, 0x00,
+ 0x62, 0x02, 0x00, 0x00
+ };
+ static const int32_t utf32OffsetsBack[]={
+ 0,0,0,0,
+ 1,1,1,1,
+ 2,2,2,2,
+ 4,4,4,4,
+ 5,5,5,5,
+ 6,6,6,6,
+ 7,7,7,7,
+ 8,8,8,8
+ };
testConvertToU(utf32, sizeof(utf32),
- utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE );
-
+ utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32le", utf32Offsets,FALSE );
+ testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
+ utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE);
}
}
const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
int32_t totest1Offs[] = { 0, 1, 2, 3, 5, };
- const uint8_t test1input[] = { 0x00, 0x05, 0x06, 0x07, 0x08, 0x09};
- const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xfffd, 0xfffd};
- int32_t fromtest1Offs[] = { 0, 1, 2, 3, 3, 4, 5};
-
/*from Unicode*/
- testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
+ testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
-
- /*to Unicode*/
- testConvertToU(test1input, sizeof(test1input),
- expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test1", fromtest1Offs ,FALSE);
-
}
/*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
/*from Unicode*/
- testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
+ testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
/*to Unicode*/
testConvertToU(test3input, sizeof(test3input),
- expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE);
+ expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test3", fromtest3Offs ,FALSE);
}
static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
/*from Unicode*/
- testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
+ testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
/*to Unicode*/
testConvertToU(test4input, sizeof(test4input),
- expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE );
+ expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test4", fromtest4Offs,FALSE );
}
#if 0
static void TestConverterTypesAndStarters()
{
+#if !UCONFIG_NO_LEGACY_CONVERSION
UConverter* myConverter;
UErrorCode err = U_ZERO_ERROR;
UBool mystarters[256];
TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
TestConverterType("ibm-878", UCNV_SBCS);
+#endif
+
TestConverterType("iso-8859-1", UCNV_LATIN_1);
+
TestConverterType("ibm-1208", UCNV_UTF8);
+
TestConverterType("utf-8", UCNV_UTF8);
TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
+
+#if !UCONFIG_NO_LEGACY_CONVERSION
+
+#if defined(U_ENABLE_GENERIC_ISO_2022)
TestConverterType("iso-2022", UCNV_ISO_2022);
+#endif
+
TestConverterType("hz", UCNV_HZ);
+#endif
+
TestConverterType("scsu", UCNV_SCSU);
+
+#if !UCONFIG_NO_LEGACY_CONVERSION
TestConverterType("x-iscii-de", UCNV_ISCII);
+#endif
+
TestConverterType("ascii", UCNV_US_ASCII);
TestConverterType("utf-7", UCNV_UTF7);
TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
static void
TestAmbiguousConverter(UConverter *cnv) {
- static const char inBytes[2]={ 0x61, 0x5c };
+ static const char inBytes[3]={ 0x61, 0x5B, 0x5c };
UChar outUnicode[20]={ 0, 0, 0, 0 };
const char *s;
UErrorCode errorCode;
UBool isAmbiguous;
- /* try to convert an 'a' and a US-ASCII backslash */
+ /* try to convert an 'a', a square bracket and a US-ASCII backslash */
errorCode=U_ZERO_ERROR;
s=inBytes;
u=outUnicode;
- ucnv_toUnicode(cnv, &u, u+20, &s, s+2, NULL, TRUE, &errorCode);
+ ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode);
if(U_FAILURE(errorCode)) {
/* we do not care about general failures in this test; the input may just not be mappable */
return;
}
- if(outUnicode[0]!=0x61 || outUnicode[1]==0xfffd) {
- /* not an ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
+ if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) {
+ /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
+ /* There are some encodings that are partially ASCII based,
+ like the ISO-7 and GSM series of codepages, which we ignore. */
return;
}
isAmbiguous=ucnv_isAmbiguous(cnv);
/* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
- if((outUnicode[1]!=0x5c)!=isAmbiguous) {
+ if((outUnicode[2]!=0x5c)!=isAmbiguous) {
log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
- ucnv_getName(cnv, &errorCode), outUnicode[1]!=0x5c, isAmbiguous);
+ ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous);
return;
}
- if(outUnicode[1]!=0x5c) {
+ if(outUnicode[2]!=0x5c) {
/* needs fixup, fix it */
ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
- if(outUnicode[1]!=0x5c) {
+ if(outUnicode[2]!=0x5c) {
/* the fix failed */
log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
return;
{
UErrorCode status = U_ZERO_ERROR;
UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
- const char target[] = {
+ static const char target[] = {
/* "\\usr\\local\\share\\data\\icutest.txt" */
0x5c, 0x75, 0x73, 0x72,
0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
0
};
UChar asciiResult[200], sjisResult[200];
- int32_t asciiLength = 0, sjisLength = 0, i;
+ int32_t /*asciiLength = 0,*/ sjisLength = 0, i;
const char *name;
/* enumerate all converters */
}
}
+#if !UCONFIG_NO_LEGACY_CONVERSION
sjis_cnv = ucnv_open("ibm-943", &status);
if (U_FAILURE(status))
{
return;
}
/* convert target from SJIS to Unicode */
- sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, strlen(target), &status);
+ sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, UPRV_LENGTHOF(sjisResult), target, (int32_t)strlen(target), &status);
if (U_FAILURE(status))
{
log_err("Failed to convert the SJIS string.\n");
return;
}
/* convert target from Latin-1 to Unicode */
- asciiLength = ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, strlen(target), &status);
+ /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, UPRV_LENGTHOF(asciiResult), target, (int32_t)strlen(target), &status);
if (U_FAILURE(status))
{
log_err("Failed to convert the Latin-1 string.\n");
- free(sjisResult);
ucnv_close(sjis_cnv);
ucnv_close(ascii_cnv);
return;
if (!ucnv_isAmbiguous(sjis_cnv))
{
log_err("SJIS converter should contain ambiguous character mappings.\n");
- free(sjisResult);
- free(asciiResult);
ucnv_close(sjis_cnv);
ucnv_close(ascii_cnv);
return;
}
ucnv_close(sjis_cnv);
ucnv_close(ascii_cnv);
+#endif
}
static void
"\x2B\x2F\x76\x38\x41", /* UTF-7 */
"\x2B\x2F\x76\x39\x41", /* UTF-7 */
"\x2B\x2F\x76\x2B\x41", /* UTF-7 */
- "\x2B\x2F\x76\x2F\x41" /* UTF-7 */
+ "\x2B\x2F\x76\x2F\x41", /* UTF-7 */
+
+ "\xDD\x73\x66\x73" /* UTF-EBCDIC */
};
static const char* expected[] = {
"UTF-16BE",
"UTF-7",
"UTF-7",
"UTF-7",
- "UTF-7"
+ "UTF-7",
+ "UTF-EBCDIC"
};
static const int32_t expectedLength[] ={
2,
4,
4,
4,
+ 4,
4
};
int i=0;
int32_t signatureLength = -1;
const char* source = NULL;
const char* enc = NULL;
- for( ; i<sizeof(data)/sizeof(char*); i++){
+ for( ; i<UPRV_LENGTHOF(data); i++){
err = U_ZERO_ERROR;
source = data[i];
enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
int32_t sourceLength=-1;
const char* source = NULL;
const char* enc = NULL;
- for( ; i<sizeof(data)/sizeof(char*); i++){
+ for( ; i<UPRV_LENGTHOF(data); i++){
err = U_ZERO_ERROR;
source = data[i];
sourceLength = len[i];
}
}
-void
-static TestUTF7() {
+static void TestUTF7() {
/* test input */
static const uint8_t in[]={
/* H - +Jjo- - ! +- +2AHcAQ */
};
/* expected test results */
- static const uint32_t results[]={
+ static const int32_t results[]={
/* number of bytes read, code point */
1, 0x48,
1, 0x2d,
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=ucnv_open("UTF-7", &errorCode);
if(U_FAILURE(errorCode)) {
- log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
+ log_data_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode));
return;
}
TestNextUChar(cnv, source, limit, results, "UTF-7");
ucnv_close(cnv);
}
-void
-static TestIMAP() {
+static void TestIMAP() {
/* test input */
static const uint8_t in[]={
/* H - &Jjo- - ! &- &2AHcAQ- \ */
};
/* expected test results */
- static const uint32_t results[]={
+ static const int32_t results[]={
/* number of bytes read, code point */
1, 0x48,
1, 0x2d,
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
if(U_FAILURE(errorCode)) {
- log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
+ log_data_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode));
return;
}
TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
ucnv_close(cnv);
}
-void
-static TestUTF8() {
+static void TestUTF8() {
/* test input */
static const uint8_t in[]={
0x61,
};
/* expected test results */
- static const uint32_t results[]={
+ static const int32_t results[]={
/* number of bytes read, code point */
1, 0x61,
2, 0x80,
};
/* expected error test results */
- static const uint32_t results2[]={
+ static const int32_t results2[]={
/* number of bytes read, code point */
1, 0x61,
22, 0x62
ucnv_close(cnv);
}
-void
-static TestCESU8() {
+static void TestCESU8() {
/* test input */
static const uint8_t in[]={
0x61,
};
/* expected test results */
- static const uint32_t results[]={
+ static const int32_t results[]={
/* number of bytes read, code point */
1, 0x61,
2, 0x80,
3, 0x800,
6, 0x10000,
3, 0xdc01,
- 3, 0xd802,
- 6, 0x10ffff,
+ -1,0xd802, /* may read 3 or 6 bytes */
+ -1,0x10ffff,/* may read 0 or 3 bytes */
3, 0xfffc
};
};
/* expected error test results */
- static const uint32_t results2[]={
+ static const int32_t results2[]={
/* number of bytes read, code point */
1, 0x61,
34, 0x62
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=ucnv_open("CESU-8", &errorCode);
if(U_FAILURE(errorCode)) {
- log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
+ log_data_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
return;
}
TestNextUChar(cnv, source, limit, results, "CESU-8");
ucnv_close(cnv);
}
-void
-static TestUTF16() {
+static void TestUTF16() {
/* test input */
static const uint8_t in1[]={
0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
};
/* expected test results */
- static const uint32_t results1[]={
+ static const int32_t results1[]={
/* number of bytes read, code point */
4, 0x4e00,
2, 0xfeff
};
- static const uint32_t results2[]={
+ static const int32_t results2[]={
/* number of bytes read, code point */
4, 0x004e,
2, 0xfffe
};
- static const uint32_t results3[]={
+ static const int32_t results3[]={
/* number of bytes read, code point */
2, 0xfefe,
2, 0x4e00,
ucnv_close(cnv);
}
-void
-static TestUTF16BE() {
+static void TestUTF16BE() {
/* test input */
static const uint8_t in[]={
0x00, 0x61,
};
/* expected test results */
- static const uint32_t results[]={
+ static const int32_t results[]={
/* number of bytes read, code point */
2, 0x61,
2, 0xc0,
/*Test for the condition where there is an invalid character*/
{
static const uint8_t source2[]={0x61};
+ ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
}
+#if 0
+ /*
+ * Test disabled because currently the UTF-16BE/LE converters are supposed
+ * to not set errors for unpaired surrogates.
+ * This may change with
+ * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
+ */
+
/*Test for the condition where there is a surrogate pair*/
{
const uint8_t source2[]={0xd8, 0x01};
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
}
+#endif
ucnv_close(cnv);
}
};
/* expected test results */
- static const uint32_t results[]={
+ static const int32_t results[]={
/* number of bytes read, code point */
2, 0x61,
2, 0x31,
/*Test for the condition where there is an invalid character*/
{
static const uint8_t source2[]={0x61};
+ ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
}
+#if 0
+ /*
+ * Test disabled because currently the UTF-16BE/LE converters are supposed
+ * to not set errors for unpaired surrogates.
+ * This may change with
+ * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
+ */
+
/*Test for the condition where there is a surrogate character*/
{
static const uint8_t source2[]={0x01, 0xd8};
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
}
+#endif
ucnv_close(cnv);
}
-void
-static TestUTF32() {
+static void TestUTF32() {
/* test input */
static const uint8_t in1[]={
0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff
};
/* expected test results */
- static const uint32_t results1[]={
+ static const int32_t results1[]={
/* number of bytes read, code point */
8, 0x100f00,
4, 0xfeff
};
- static const uint32_t results2[]={
+ static const int32_t results2[]={
/* number of bytes read, code point */
8, 0x0f1000,
4, 0xfffe
};
- static const uint32_t results3[]={
+ static const int32_t results3[]={
/* number of bytes read, code point */
4, 0xfefe,
4, 0x100f00,
- 4, 0xd840,
- 4, 0xdc01
+ 4, 0xfffd, /* unmatched surrogate */
+ 4, 0xfffd /* unmatched surrogate */
};
const char *source, *limit;
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=ucnv_open("UTF-32", &errorCode);
if(U_FAILURE(errorCode)) {
- log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
+ log_data_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
return;
}
/* test input */
static const uint8_t in[]={
0x00, 0x00, 0x00, 0x61,
+ 0x00, 0x00, 0x30, 0x61,
0x00, 0x00, 0xdc, 0x00,
0x00, 0x00, 0xd8, 0x00,
0x00, 0x00, 0xdf, 0xff,
- 0x00, 0x00, 0xff, 0xfd,
+ 0x00, 0x00, 0xff, 0xfe,
0x00, 0x10, 0xab, 0xcd,
0x00, 0x10, 0xff, 0xff
};
/* expected test results */
- static const uint32_t results[]={
+ static const int32_t results[]={
/* number of bytes read, code point */
4, 0x61,
- 4, 0xdc00,
- 4, 0xd800,
- 4, 0xdfff,
+ 4, 0x3061,
+ 4, 0xfffd,
4, 0xfffd,
+ 4, 0xfffd,
+ 4, 0xfffe,
4, 0x10abcd,
4, 0x10ffff
};
};
/* expected error test results */
- static const uint32_t results2[]={
+ static const int32_t results2[]={
/* number of bytes read, code point */
4, 0x61,
8, 0x62,
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
if(U_FAILURE(errorCode)) {
- log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
+ log_data_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
return;
}
TestNextUChar(cnv, source, limit, results, "UTF-32BE");
/* test input */
static const uint8_t in[]={
0x61, 0x00, 0x00, 0x00,
+ 0x61, 0x30, 0x00, 0x00,
0x00, 0xdc, 0x00, 0x00,
0x00, 0xd8, 0x00, 0x00,
0xff, 0xdf, 0x00, 0x00,
- 0xfd, 0xff, 0x00, 0x00,
+ 0xfe, 0xff, 0x00, 0x00,
0xcd, 0xab, 0x10, 0x00,
0xff, 0xff, 0x10, 0x00
};
/* expected test results */
- static const uint32_t results[]={
+ static const int32_t results[]={
/* number of bytes read, code point */
4, 0x61,
- 4, 0xdc00,
- 4, 0xd800,
- 4, 0xdfff,
+ 4, 0x3061,
+ 4, 0xfffd,
4, 0xfffd,
+ 4, 0xfffd,
+ 4, 0xfffe,
4, 0x10abcd,
4, 0x10ffff
};
};
/* expected error test results */
- static const uint32_t results2[]={
+ static const int32_t results2[]={
/* number of bytes read, code point */
4, 0x61,
8, 0x62,
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
if(U_FAILURE(errorCode)) {
- log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
+ log_data_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
return;
}
TestNextUChar(cnv, source, limit, results, "UTF-32LE");
};
/* expected test results */
- static const uint32_t results[]={
+ static const int32_t results[]={
/* number of bytes read, code point */
1, 0x61,
1, 0x31,
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
if(U_FAILURE(errorCode)) {
- log_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
+ log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
return;
}
TestNextUChar(cnv, source, limit, results, "LATIN_1");
/* test input */
static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
/* expected test results */
- static const uint32_t results[]={
+ static const int32_t results[]={
/* number of bytes read, code point */
1, 0x61,
1, 0xbf,
const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
UErrorCode errorCode=U_ZERO_ERROR;
- UConverter *cnv=ucnv_open("ibm-1281", &errorCode);
+ UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
if(U_FAILURE(errorCode)) {
- log_data_err("Unable to open a SBCS(ibm-1281) converter: %s\n", u_errorName(errorCode));
+ log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
return;
}
- TestNextUChar(cnv, source, limit, results, "SBCS(ibm-1281)");
+ TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
/* Test the condition when source >= sourceLimit */
TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
/*Test for Illegal character */ /*
};
/* expected test results */
- static const uint32_t results[]={
+ static const int32_t results[]={
/* number of bytes read, code point */
2, 0x00a7,
2, 0xe1d2,
TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
/* Test the condition when source >= sourceLimit */
TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
- /*Test for the condition where we have a truncated char*/
- {
- static const uint8_t source1[]={0xc4};
- TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
- }
/*Test for the condition where there is an invalid character*/
{
static const uint8_t source2[]={0x1a, 0x1b};
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
}
+ /*Test for the condition where we have a truncated char*/
+ {
+ static const uint8_t source1[]={0xc4};
+ ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
+ TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
+ }
ucnv_close(cnv);
}
};
/* expected test results */
- static const uint32_t results[]={
+ static const int32_t results[]={
/* number of bytes read, code point */
1, 0x0001,
2, 0x250c,
TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
/* Test the condition when source >= sourceLimit */
TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
+ /*Test for the condition where there is an invalid character*/
+ {
+ static const uint8_t source2[]={0xa1, 0x80};
+ TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
+ }
/*Test for the condition where we have a truncated char*/
{
static const uint8_t source1[]={0xc4};
+ ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
}
- /*Test for the condition where there is an invalid character*/
+ ucnv_close(cnv);
+
+}
+
+#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
+static void
+TestICCRunout() {
+/* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
+
+ const char *cnvName = "ibm-1363";
+ UErrorCode status = U_ZERO_ERROR;
+ const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 };
+ /* UChar expectUData[] = { 0x00a1, 0x001a }; */
+ const char *source = sourceData;
+ const char *sourceLim = sourceData+sizeof(sourceData);
+ UChar c1, c2, c3;
+ UConverter *cnv=ucnv_open(cnvName, &status);
+ if(U_FAILURE(status)) {
+ log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status));
+ return;
+ }
+
+#if 0
{
- static const uint8_t source2[]={0xa1, 0x01};
- TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
+ UChar targetBuf[256];
+ UChar *target = targetBuf;
+ UChar *targetLim = target+256;
+ ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status);
+
+ log_info("After convert: target@%d, source@%d, status%s\n",
+ target-targetBuf, source-sourceData, u_errorName(status));
+
+ if(U_FAILURE(status)) {
+ log_err("Failed to convert: %s\n", u_errorName(status));
+ } else {
+
}
- ucnv_close(cnv);
+ }
+#endif
+
+ c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
+ log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status));
+
+ c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
+ log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status));
+ c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
+ log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status));
+
+ if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) {
+ log_verbose("OK\n");
+ } else {
+ log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
+ }
+
+ ucnv_close(cnv);
+
}
+#endif
+
+#ifdef U_ENABLE_GENERIC_ISO_2022
static void
TestISO_2022() {
/* test input */
static const uint8_t in[]={
- 0x1b, 0x25, 0x42, 0x31,
+ 0x1b, 0x25, 0x42,
+ 0x31,
0x32,
0x61,
0xc2, 0x80,
/* expected test results */
- static const uint32_t results[]={
+ static const int32_t results[]={
/* number of bytes read, code point */
- 4, 0x0031,
+ 4, 0x0031, /* 4 bytes including the escape sequence */
1, 0x0032,
1, 0x61,
2, 0x80,
3, 0x800,
- 4, 0x10000,
-
+ 4, 0x10000
};
const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
/*Test for the condition where we have a truncated char*/
{
static const uint8_t source1[]={0xc4};
+ ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
}
/*Test for the condition where there is an invalid character*/
{
static const uint8_t source2[]={0xa1, 0x01};
- TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
+ TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
}
ucnv_close(cnv);
}
+#endif
+
static void
TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
const UChar* uSource;
char *cTarget;
const char *cTargetLimit;
char *cBuf;
- UChar *uBuf,*test;
+ UChar *uBuf; /*,*test;*/
int32_t uBufSize = 120;
int len=0;
int i=2;
}while(cSource<cSourceLimit);
uSource = source;
- test =uBuf;
+ /*test =uBuf;*/
for(len=0;len<(int)(source - sourceLimit);len++){
if(uBuf[len]!=uSource[len]){
log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
uTarget = uBuf;
uTargetLimit = uBuf+ uBufSize*5;
ucnv_reset(cnv);
- numCharsInTarget=ucnv_fromUChars( cnv , cTarget, (cTargetLimit-cTarget),uSource,(uSourceLimit-uSource), &errorCode);
+ numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
if(U_FAILURE(errorCode)){
log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
cSource = cBuf;
test =uBuf;
- ucnv_toUChars(cnv,uTarget,(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
+ ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
if(U_FAILURE(errorCode)){
log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
return;
char *cTarget;
const char *cTargetLimit;
char *cBuf;
- UChar *uBuf,*test;
+ UChar *uBuf; /*,*test;*/
int32_t uBufSize = 120;
int len=0;
int i=2;
}while(cSource<cTarget);
uSource = source;
- test =uBuf;
+ /*test =uBuf;*/
for(;len<(int)(source - sourceLimit);len++){
if(uBuf[len]!=uSource[len]){
log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
static void
TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
const uint16_t results[], const char* message){
- const char* s0;
+/* const char* s0; */
const char* s=(char*)source;
const uint16_t *r=results;
UErrorCode errorCode=U_ZERO_ERROR;
uint32_t c,exC;
ucnv_reset(cnv);
while(s<limit) {
- s0=s;
+ /* s0=s; */
c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
break; /* no more significant input */
log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
break;
} else {
- if(UTF_IS_FIRST_SURROGATE(*r)){
+ if(U16_IS_LEAD(*r)){
int i =0, len = 2;
- UTF_NEXT_CHAR_SAFE(r, i, len, exC, FALSE);
+ U16_NEXT(r, i, len, exC);
r++;
}else{
exC = *r;
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
- uSource = (const UChar*)&in[0];
- uSourceLimit=(const UChar*)&in[sizeof(in)/2];
+ uSource = (const UChar*)in;
+ uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
cTarget = cBuf;
cTargetLimit = cBuf +uBufSize*5;
uTarget = uBuf;
log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
- uSource = (const UChar*)&in[0];
+ uSource = (const UChar*)in;
while(uSource<uSourceLimit){
if(*test!=*uSource){
test++;
}
TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
- TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
- TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
- TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
+ TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
+ TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
+ TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
TestJitterbug930("csISO2022JP");
ucnv_close(cnv);
free(offsets);
0xEF, 0x30,
};
- testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE);
+ testConvertToU(byteArr,(sizeof(byteArr)),in,UPRV_LENGTHOF(in),"x-iscii-de",NULL,TRUE);
TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
}
0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
- 0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
+ 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
0x201D, 0x3014, 0x000D, 0x000A,
0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
int32_t* myOff= offsets;
cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
if(U_FAILURE(errorCode)) {
- log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
+ log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
return;
}
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
- uSource = (const UChar*)&in[0];
- uSourceLimit=(const UChar*)&in[sizeof(in)/2];
+ uSource = (const UChar*)in;
+ uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
cTarget = cBuf;
cTargetLimit = cBuf +uBufSize*5;
uTarget = uBuf;
return;
}
- uSource = (const UChar*)&in[0];
+ uSource = (const UChar*)in;
while(uSource<uSourceLimit){
if(*test!=*uSource){
test++;
}
- TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
- TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
+ TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
+ TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
- TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
+ TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
TestJitterbug930("csISO2022JP");
ucnv_close(cnv);
free(uBuf);
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
cBuf =(char*)malloc(uBufSize * sizeof(char));
- uSource = (const UChar*)&in[0];
+ uSource = (const UChar*)in;
uSourceLimit=uSource+len;
cTarget = cBuf;
cTargetLimit = cBuf +uBufSize;
return;
}
- uSource = (const UChar*)&in[0];
+ uSource = (const UChar*)in;
while(uSource<uSourceLimit){
if(*test!=*uSource){
log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
uSource++;
test++;
}
- TestSmallTargetBuffer(&in[0],(const UChar*)&in[len],cnv);
- TestSmallSourceBuffer(&in[0],(const UChar*)&in[len],cnv);
+ TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
+ TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
if(byteArr && byteArrLen!=0){
TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
- TestToAndFromUChars(&in[0],(const UChar*)&in[len],cnv);
+ TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
{
cSource = byteArr;
cSourceLimit = cSource+byteArrLen;
return;
}
- uSource = (const UChar*)&in[0];
+ uSource = (const UChar*)in;
while(uSource<uSourceLimit){
if(*test!=*uSource){
log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
return 0;
}
if(srcLen==-1){
- srcLen = uprv_strlen(src);
+ srcLen = (int32_t)uprv_strlen(src);
}
for (; srcIndex<srcLen; ) {
}
if(dstIndex < dstLen){
if(c>0xFFFF){
- dst[dstIndex++] = UTF16_LEAD(c);
+ dst[dstIndex++] = U16_LEAD(c);
if(dstIndex<dstLen){
- dst[dstIndex]=UTF16_TRAIL(c);
+ dst[dstIndex]=U16_TRAIL(c);
}else{
*status=U_BUFFER_OVERFLOW_ERROR;
}
usource[0] =(UChar) i;
len=1;
}else{
- usource[0]=UTF16_LEAD(i);
- usource[1]=UTF16_TRAIL(i);
+ usource[0]=U16_LEAD(i);
+ usource[1]=U16_TRAIL(i);
len=2;
}
ulen=len;
static void
TestRoundTrippingAllUTF(void){
- if(!QUICK){
+ if(!getTestOption(QUICK_OPTION)){
log_verbose("Running exhaustive round trip test for BOCU-1\n");
TestFullRoundtrip("BOCU-1");
log_verbose("Running exhaustive round trip test for SCSU\n");
TestFullRoundtrip("UTF-7,version=1");
log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
TestFullRoundtrip("IMAP-mailbox-name");
- log_verbose("Running exhaustive round trip test for GB18030\n");
- TestFullRoundtrip("GB18030");
+ /*
+ *
+ * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of
+ * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA).
+ * The old mappings remain as fallbacks.
+ * This test may be reintroduced at a later time.
+ *
+ * 110118 - mow
+ */
+ /*
+ log_verbose("Running exhaustive round trip test for GB18030\n");
+ TestFullRoundtrip("GB18030");
+ */
}
}
"\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
};
int i=0;
- for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){
+ for(;i<UPRV_LENGTHOF(fTestCases);i++){
const char* cSrc = fTestCases[i];
UErrorCode status = U_ZERO_ERROR;
int32_t cSrcLen,srcLen;
UChar* src;
/* UConverter* cnv = ucnv_open("SCSU",&status); */
- cSrcLen= srcLen = uprv_strlen(fTestCases[i]);
+ cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
}
+
+#if !UCONFIG_NO_LEGACY_CONVERSION
static void TestJitterbug2346(){
char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
}
+
static void
TestISO_2022_JP_1() {
/* test input */
0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
- 0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
+ 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
0x201D, 0x000D, 0x000A,
0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
- uSource = (const UChar*)&in[0];
- uSourceLimit=(const UChar*)&in[sizeof(in)/2];
+ uSource = (const UChar*)in;
+ uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
cTarget = cBuf;
cTargetLimit = cBuf +uBufSize*5;
uTarget = uBuf;
log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
- uSource = (const UChar*)&in[0];
+ uSource = (const UChar*)in;
while(uSource<uSourceLimit){
if(*test!=*uSource){
static const uint8_t source2[]={0x0e,0x24,0x053};
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
}
- TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
- TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
+ TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
+ TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
ucnv_close(cnv);
free(uBuf);
free(cBuf);
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
- uSource = (const UChar*)&in[0];
- uSourceLimit=(const UChar*)&in[sizeof(in)/2];
+ uSource = (const UChar*)in;
+ uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
cTarget = cBuf;
cTargetLimit = cBuf +uBufSize*5;
uTarget = uBuf;
log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
- uSource = (const UChar*)&in[0];
+ uSource = (const UChar*)in;
while(uSource<uSourceLimit){
if(*test!=*uSource){
uSource++;
test++;
}
- TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
- TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
- TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
+ TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
+ TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
+ TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
/*Test for the condition where there is an invalid character*/
ucnv_reset(cnv);
{
TestISO_2022_KR() {
/* test input */
static const uint16_t in[]={
- 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F66,0x9F67,0x9F6A,0x000A,0x000D
- ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC02,0xAC04
+ 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
+ ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
- ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53DF,0x53E1,0x53E2
+ ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
,0x53E3,0x53E4,0x000A,0x000D};
const UChar* uSource;
const UChar* uSourceLimit;
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
- uSource = (const UChar*)&in[0];
- uSourceLimit=(const UChar*)&in[sizeof(in)/2];
+ uSource = (const UChar*)in;
+ uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
cTarget = cBuf;
cTargetLimit = cBuf +uBufSize*5;
uTarget = uBuf;
log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
- uSource = (const UChar*)&in[0];
+ uSource = (const UChar*)in;
while(uSource<uSourceLimit){
if(*test!=*uSource){
log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
test++;
}
TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
- TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
- TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
- TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
+ TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
+ TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
+ TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
TestJitterbug930("csISO2022KR");
/*Test for the condition where there is an invalid character*/
ucnv_reset(cnv);
{
static const uint8_t source2[]={0x1b,0x24,0x053};
+ ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
}
ucnv_close(cnv);
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
- uSource = (const UChar*)&in[0];
- uSourceLimit=(const UChar*)&in[sizeof(in)/2];
+ uSource = (const UChar*)in;
+ uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
cTarget = cBuf;
cTargetLimit = cBuf +uBufSize*5;
uTarget = uBuf;
log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
- uSource = (const UChar*)&in[0];
+ uSource = (const UChar*)in;
while(uSource<uSourceLimit){
if(*test!=*uSource){
log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
}
ucnv_reset(cnv);
TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
- TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
- TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
+ TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
+ TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
ucnv_reset(cnv);
- TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
+ TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
/*Test for the condition where there is an invalid character*/
ucnv_reset(cnv);
{
static const uint8_t source2[]={0x1b,0x24,0x053};
+ ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
}
ucnv_close(cnv);
}
static void TestJitterbug2411(){
- const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
+ static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
"\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
UConverter* kr=NULL, *kr1=NULL;
UErrorCode errorCode = U_ZERO_ERROR;
static void
TestJIS(){
- /* From Unicode */
- {
- /* JIS Encoding */
- UChar sampleTextJIS[] ={
- 0xFF81, 0xFF82,
- 0x30EC, 0x30ED,
- 0x30EE, 0x30EF,
- 0xFF93, 0xFF94,
- 0xFF95, 0xFF96,
- 0xFF97, 0xFF98
- };
- const uint8_t expectedISO2022JIS[] ={
- 0x1b, 0x24, 0x42,
- 0x25, 0x41, 0x25, 0x44,
- 0x25, 0x6c, 0x25, 0x6d,
- 0x25, 0x6e, 0x25, 0x6F,
- 0x25, 0x62, 0x25, 0x64,
- 0x25, 0x66, 0x25, 0x68,
- 0x25, 0x69, 0x25, 0x6a
-
- };
- int32_t fmISO2022JISOffs[] ={
- 0,0,0,
- 0,0,1,1,
- 2,2,3,3,
- 4,4,5,5,
- 6,6,7,7,
- 8,8,9,9,
- 10,10,11,11
-
- };
-
- /* JIS7 Encoding */
- const uint8_t expectedISO2022JIS7[] ={
- 0x1b, 0x28, 0x49,
- 0x41, 0x42,
- 0x1b, 0x24, 0x42,
- 0x25, 0x6c, 0x25, 0x6d,
- 0x25, 0x6e, 0x25, 0x6F,
- 0x1b, 0x28, 0x49,
- 0x53, 0x54,
- 0x55, 0x56,
- 0x57, 0x58
-
- };
- int32_t fmISO2022JIS7Offs[] ={
- 0,0,0,
- 0,1,
- 2,2,2,
- 2,2,3,3,
- 4,4,5,5,
- 6,6,6,
- 6,7,
- 8,9,
- 10,11
-
- };
-
- /* JIS8 Encoding */
- const uint8_t expectedISO2022JIS8[] ={
- 0x1b, 0x28, 0x4A,
- 0xC1, 0xC2,
- 0x1b, 0x24, 0x42,
- 0x25, 0x6c, 0x25, 0x6d,
- 0x25, 0x6e, 0x25, 0x6F,
- 0x1b, 0x28, 0x4A,
- 0xD3, 0xD4,
- 0xD5, 0xD6,
- 0xD7, 0xD8
-
- };
- int32_t fmISO2022JIS8Offs[] ={
- 0,0,0,
- 0,1,
- 2,2,2,
- 2,2,3,3,
- 4,4,5,5,
- 6,6,6,
- 6,7,
- 8,9,
- 10,11
-
- };
- testConvertFromU(sampleTextJIS, sizeof(sampleTextJIS)/sizeof(sampleTextJIS[0]),
- expectedISO2022JIS, sizeof(expectedISO2022JIS), "JIS", fmISO2022JISOffs,TRUE );
- testConvertFromU(sampleTextJIS, sizeof(sampleTextJIS)/sizeof(sampleTextJIS[0]),
- expectedISO2022JIS7, sizeof(expectedISO2022JIS7), "JIS7", fmISO2022JIS7Offs,FALSE );
- testConvertFromU(sampleTextJIS, sizeof(sampleTextJIS)/sizeof(sampleTextJIS[0]),
- expectedISO2022JIS8, sizeof(expectedISO2022JIS8), "JIS8", fmISO2022JIS8Offs,FALSE );
-
-
- }
+ /* From Unicode moved to testdata/conversion.txt */
/*To Unicode*/
{
- const uint8_t sampleTextJIS[] = {
+ static const uint8_t sampleTextJIS[] = {
0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
};
- const uint16_t expectedISO2022JIS[] = {
+ static const uint16_t expectedISO2022JIS[] = {
0x0041, 0x0042,
0xFF81, 0xFF82,
0x3000
};
- int32_t toISO2022JISOffs[]={
+ static const int32_t toISO2022JISOffs[]={
3,4,
8,9,
16
};
- const uint8_t sampleTextJIS7[] = {
+ static const uint8_t sampleTextJIS7[] = {
0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
0x1b,0x24,0x42,0x21,0x21,
0x21,0x22,
0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
};
- const uint16_t expectedISO2022JIS7[] = {
+ static const uint16_t expectedISO2022JIS7[] = {
0x0041, 0x0042,
0xFF81, 0xFF82,
0x3000,
0x3001,
0x3000
};
- int32_t toISO2022JIS7Offs[]={
+ static const int32_t toISO2022JIS7Offs[]={
3,4,
8,9,
13,16,
17,
19,27
};
- const uint8_t sampleTextJIS8[] = {
+ static const uint8_t sampleTextJIS8[] = {
0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
0xa1,0xc8,0xd9,/*Katakana Set*/
0x1b,0x28,0x42,
0xb1,0xc3, /*Katakana Set*/
0x1b,0x24,0x42,0x21,0x21
};
- const uint16_t expectedISO2022JIS8[] = {
+ static const uint16_t expectedISO2022JIS8[] = {
0x0041, 0x0042,
0xff61, 0xff88, 0xff99,
0x0041, 0x0042,
0xff71, 0xff83,
0x3000
};
- int32_t toISO2022JIS8Offs[]={
+ static const int32_t toISO2022JIS8Offs[]={
3, 4, 5, 6,
7, 11, 12, 13,
14, 18,
};
testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
- sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE);
+ UPRV_LENGTHOF(expectedISO2022JIS),"JIS", toISO2022JISOffs,TRUE);
testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
- sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE);
+ UPRV_LENGTHOF(expectedISO2022JIS7),"JIS7", toISO2022JIS7Offs,TRUE);
testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
- sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE);
+ UPRV_LENGTHOF(expectedISO2022JIS8),"JIS8", toISO2022JIS8Offs,TRUE);
}
}
+
+#if 0
+ ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
+
static void TestJitterbug915(){
/* tests for roundtripping of the below sequence
\x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * /
\x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
\x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
*/
- static char cSource[]={
+ static const char cSource[]={
0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
- 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
+ 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
- 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
+ 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
- 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
+ 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
- 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
+ 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
- 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
+ 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
- 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
- 0x37, 0x20, 0x2A, 0x2F,
+ 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
+ 0x37, 0x20, 0x2A, 0x2F
};
UChar uTarget[500]={'\0'};
UChar* utarget=uTarget;
char* ctarget=cTarget;
char* ctargetLimit=cTarget+sizeof(cTarget);
const char* csource=cSource;
- char* tempSrc = cSource;
+ const char* tempSrc = cSource;
UErrorCode err=U_ZERO_ERROR;
UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
ctargetLimit=ctarget;
ctarget =cTarget;
while(ctarget<ctargetLimit){
- if(*(ctarget++) != *(tempSrc++)){
- log_err("Expected : \\x%02X \t Got: \\x%02X\n",*ctarget,(int)*tempSrc) ;
+ if(*ctarget != *tempSrc){
+ log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
}
+ ++ctarget;
+ ++tempSrc;
}
ucnv_close(conv);
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
- uSource = (const UChar*)&in[0];
- uSourceLimit=(const UChar*)&in[sizeof(in)/2];
+ uSource = (const UChar*)in;
+ uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
cTarget = cBuf;
cTargetLimit = cBuf +uBufSize*5;
uTarget = uBuf;
log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
- uSource = (const UChar*)&in[0];
+ uSource = (const UChar*)in;
while(uSource<uSourceLimit){
if(*test!=*uSource){
log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
uSource++;
test++;
}
- TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
- TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
+ TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
+ TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
/*Test for the condition where there is an invalid character*/
ucnv_reset(cnv);
{
free(cBuf);
free(offsets);
}
+#endif
static void
TestISO_2022_CN() {
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
- uSource = (const UChar*)&in[0];
- uSourceLimit=(const UChar*)&in[sizeof(in)/2];
+ uSource = (const UChar*)in;
+ uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
cTarget = cBuf;
cTargetLimit = cBuf +uBufSize*5;
uTarget = uBuf;
log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
- uSource = (const UChar*)&in[0];
+ uSource = (const UChar*)in;
while(uSource<uSourceLimit){
if(*test!=*uSource){
log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
test++;
}
TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
- TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
- TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
- TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
+ TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
+ TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
+ TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
TestJitterbug930("csISO2022CN");
/*Test for the condition where there is an invalid character*/
ucnv_reset(cnv);
free(offsets);
}
+/* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
+typedef struct {
+ const char * converterName;
+ const char * inputText;
+ int inputTextLength;
+} EmptySegmentTest;
+
+/* Callback for TestJitterbug6175, should only get called for empty segment errors */
+static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
+ int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
+ if (reason > UCNV_IRREGULAR) {
+ return;
+ }
+ if (reason != UCNV_IRREGULAR) {
+ log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
+ }
+ /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
+ *err = U_ZERO_ERROR;
+ ucnv_cbToUWriteSub(toArgs,0,err);
+}
+
+enum { kEmptySegmentToUCharsMax = 64 };
+static void TestJitterbug6175(void) {
+ static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
+ static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
+ static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
+ static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
+ static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
+ static const EmptySegmentTest emptySegmentTests[] = {
+ /* converterName inputText inputTextLength */
+ { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
+ { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
+ { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
+ { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
+ { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) },
+ /* terminator: */
+ { NULL, NULL, 0, }
+ };
+ const EmptySegmentTest * testPtr;
+ for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
+ UErrorCode err = U_ZERO_ERROR;
+ UConverter * cnv = ucnv_open(testPtr->converterName, &err);
+ if (U_FAILURE(err)) {
+ log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
+ return;
+ }
+ ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
+ if (U_FAILURE(err)) {
+ log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
+ ucnv_close(cnv);
+ return;
+ }
+ {
+ UChar toUChars[kEmptySegmentToUCharsMax];
+ UChar * toUCharsPtr = toUChars;
+ const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
+ const char * inCharsPtr = testPtr->inputText;
+ const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength;
+ ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err);
+ }
+ ucnv_close(cnv);
+ }
+}
+
static void
TestEBCDIC_STATEFUL() {
/* test input */
};
/* expected test results */
- static const uint32_t results[]={
+ static const int32_t results[]={
/* number of bytes read, code point */
1, 0x002f,
1, 0x0092,
};
/* expected test results */
- static const uint32_t results2[]={
+ static const int32_t results2[]={
/* number of bytes read, code point */
2, 0x203E,
1, 0x0001,
};
/* expected test results */
- static const uint32_t results[]={
+ static const int32_t results[]={
/* number of bytes read, code point */
1, 0x24,
1, 0x7f,
{
UErrorCode errorCode=U_ZERO_ERROR;
- const uint8_t * pSource = pszLMBCS;
- const uint8_t * sourceLimit = pszLMBCS + sizeof(pszLMBCS);
+ const char * pSource = (const char *)pszLMBCS;
+ const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
UChar Out [sizeof(pszUnicode) + 1];
UChar * pOut = Out;
- UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
+ UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
int32_t off [sizeof(offsets)];
/* last 'offset' in expected results is just the final size.
(Makes other tests easier). Compensate here: */
- off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS);
+ off[UPRV_LENGTHOF(offsets)-1] = sizeof(pszLMBCS);
ucnv_toUnicode (cnv,
&pOut,
OutLimit,
- (const char **)&pSource,
- (const char *)sourceLimit,
+ &pSource,
+ sourceLimit,
off,
TRUE,
&errorCode);
const UChar * pUniOut = uniString;
UChar * pUniIn = uniString;
uint8_t lmbcsString [4];
- const uint8_t * pLMBCSOut = lmbcsString;
- uint8_t * pLMBCSIn = lmbcsString;
+ const char * pLMBCSOut = (const char *)lmbcsString;
+ char * pLMBCSIn = (char *)lmbcsString;
/* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
ucnv_fromUnicode (cnv16he,
- (char **)&pLMBCSIn, (const char *)(pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
- &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
+ &pLMBCSIn, (pLMBCSIn + UPRV_LENGTHOF(lmbcsString)),
+ &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
NULL, 1, &errorCode);
if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
log_err("LMBCS-16,locale=he gives unexpected translation\n");
}
- pLMBCSIn=lmbcsString;
+ pLMBCSIn= (char *)lmbcsString;
pUniOut = uniString;
ucnv_fromUnicode (cnv01us,
- (char **)&pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
- &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
+ &pLMBCSIn, (const char *)(lmbcsString + UPRV_LENGTHOF(lmbcsString)),
+ &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
NULL, 1, &errorCode);
if (lmbcsString[0] != 0x9F)
/* single byte char from mbcs char set */
lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */
- pLMBCSOut = lmbcsString;
+ pLMBCSOut = (const char *)lmbcsString;
pUniIn = uniString;
ucnv_toUnicode (cnv16jp,
&pUniIn, pUniIn + 1,
- (const char **)&pLMBCSOut, (const char *)(pLMBCSOut + 1),
+ &pLMBCSOut, (pLMBCSOut + 1),
NULL, 1, &errorCode);
- if (U_FAILURE(errorCode) || pLMBCSOut != lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
+ if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
{
log_err("Unexpected results from LMBCS-16 single byte char\n");
}
/* convert to group 1: should be 3 bytes */
- pLMBCSIn = lmbcsString;
+ pLMBCSIn = (char *)lmbcsString;
pUniOut = uniString;
ucnv_fromUnicode (cnv01us,
- (char **)&pLMBCSIn, (const char *)(pLMBCSIn + 3),
+ &pLMBCSIn, (const char *)(pLMBCSIn + 3),
&pUniOut, pUniOut + 1,
NULL, 1, &errorCode);
- if (U_FAILURE(errorCode) || pLMBCSIn != lmbcsString+3 || pUniOut != uniString+1
+ if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
|| lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
{
log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
}
- pLMBCSOut = lmbcsString;
+ pLMBCSOut = (const char *)lmbcsString;
pUniIn = uniString;
ucnv_toUnicode (cnv01us,
&pUniIn, pUniIn + 1,
- (const char **)&pLMBCSOut, (const char *)(pLMBCSOut + 3),
+ &pLMBCSOut, (const char *)(pLMBCSOut + 3),
NULL, 1, &errorCode);
- if (U_FAILURE(errorCode) || pLMBCSOut != lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
+ if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
{
log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
}
- pLMBCSIn = lmbcsString;
+ pLMBCSIn = (char *)lmbcsString;
pUniOut = uniString;
ucnv_fromUnicode (cnv16jp,
- (char **)&pLMBCSIn, (const char *)(pLMBCSIn + 1),
+ &pLMBCSIn, (const char *)(pLMBCSIn + 1),
&pUniOut, pUniOut + 1,
NULL, 1, &errorCode);
- if (U_FAILURE(errorCode) || pLMBCSIn != lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
+ if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
{
log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
}
UErrorCode errorCode=U_ZERO_ERROR;
- const uint8_t * pSource = pszLMBCS;
- const uint8_t * sourceLimit = pszLMBCS + sizeof(pszLMBCS);
+ const char * pSource = (const char *)pszLMBCS;
+ const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
int codepointCount = 0;
UChar Out [sizeof(pszUnicode) + 1];
UChar * pOut = Out;
- UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
+ UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
ucnv_toUnicode (cnv,
&pOut,
OutLimit,
- (const char **)&pSource,
- (const char *)(pSource+1), /* claim that this is a 1- byte buffer */
+ &pSource,
+ (pSource+1), /* claim that this is a 1- byte buffer */
NULL,
FALSE, /* FALSE means there might be more chars in the next buffer */
&errorCode);
if (U_SUCCESS (errorCode))
{
- if ((pSource - (const uint8_t *)pszLMBCS) == offsets [codepointCount+1])
+ if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
{
/* we are on to the next code point: check value */
}
{
/* limits & surrogate error testing */
- uint8_t LIn [sizeof(pszLMBCS)];
- const uint8_t * pLIn = LIn;
+ char LIn [sizeof(pszLMBCS)];
+ const char * pLIn = LIn;
char LOut [sizeof(pszLMBCS)];
char * pLOut = LOut;
errorCode=U_ZERO_ERROR;
/* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
- ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn-1,off,FALSE, &errorCode);
+ pUIn++;
+ ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode);
if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
{
log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
}
+ pUIn--;
+
errorCode=U_ZERO_ERROR;
ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
}
/*0 byte source request - GetNextUChar : error & value == fffe or ffff */
uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
- if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
+ if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
{
log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
}
/* running out of target room : U_BUFFER_OVERFLOW_ERROR */
pUIn = pszUnicode;
- ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode);
+ ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+UPRV_LENGTHOF(pszUnicode),off,FALSE, &errorCode);
if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
{
log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
errorCode = U_ZERO_ERROR;
- pLIn = pszLMBCS;
- ucnv_toUnicode(cnv, &pUOut,pUOut+4,(const char **)&pLIn,(const char *)(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
- if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const uint8_t *)pszLMBCS+offsets[4])
+ pLIn = (const char *)pszLMBCS;
+ ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
+ if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
{
log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
}
/* unpaired or chopped LMBCS surrogates */
/* OK high surrogate, Low surrogate is chopped */
- LIn [0] = 0x14;
- LIn [1] = 0xD8;
- LIn [2] = 0x01;
- LIn [3] = 0x14;
- LIn [4] = 0xDC;
+ LIn [0] = (char)0x14;
+ LIn [1] = (char)0xD8;
+ LIn [2] = (char)0x01;
+ LIn [3] = (char)0x14;
+ LIn [4] = (char)0xDC;
pLIn = LIn;
errorCode = U_ZERO_ERROR;
pUOut = UOut;
- ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
+ ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
+ ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
{
log_err("Unexpected results on chopped low surrogate\n");
}
/* chopped at surrogate boundary */
- LIn [0] = 0x14;
- LIn [1] = 0xD8;
- LIn [2] = 0x01;
+ LIn [0] = (char)0x14;
+ LIn [1] = (char)0xD8;
+ LIn [2] = (char)0x01;
pLIn = LIn;
errorCode = U_ZERO_ERROR;
pUOut = UOut;
- ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
+ ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
{
log_err("Unexpected results on chopped at surrogate boundary \n");
}
/* unpaired surrogate plus valid Unichar */
- LIn [0] = 0x14;
- LIn [1] = 0xD8;
- LIn [2] = 0x01;
- LIn [3] = 0x14;
- LIn [4] = 0xC9;
- LIn [5] = 0xD0;
+ LIn [0] = (char)0x14;
+ LIn [1] = (char)0xD8;
+ LIn [2] = (char)0x01;
+ LIn [3] = (char)0x14;
+ LIn [4] = (char)0xC9;
+ LIn [5] = (char)0xD0;
pLIn = LIn;
errorCode = U_ZERO_ERROR;
pUOut = UOut;
- ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
+ ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
{
log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
}
/* unpaired surrogate plus chopped Unichar */
- LIn [0] = 0x14;
- LIn [1] = 0xD8;
- LIn [2] = 0x01;
- LIn [3] = 0x14;
- LIn [4] = 0xC9;
+ LIn [0] = (char)0x14;
+ LIn [1] = (char)0xD8;
+ LIn [2] = (char)0x01;
+ LIn [3] = (char)0x14;
+ LIn [4] = (char)0xC9;
pLIn = LIn;
errorCode = U_ZERO_ERROR;
pUOut = UOut;
- ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
+ ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
{
log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
}
/* unpaired surrogate plus valid non-Unichar */
- LIn [0] = 0x14;
- LIn [1] = 0xD8;
- LIn [2] = 0x01;
- LIn [3] = 0x0F;
- LIn [4] = 0x3B;
+ LIn [0] = (char)0x14;
+ LIn [1] = (char)0xD8;
+ LIn [2] = (char)0x01;
+ LIn [3] = (char)0x0F;
+ LIn [4] = (char)0x3B;
pLIn = LIn;
errorCode = U_ZERO_ERROR;
pUOut = UOut;
- ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
+ ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
{
log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
}
/* unpaired surrogate plus chopped non-Unichar */
- LIn [0] = 0x14;
- LIn [1] = 0xD8;
- LIn [2] = 0x01;
- LIn [3] = 0x0F;
+ LIn [0] = (char)0x14;
+ LIn [1] = (char)0xD8;
+ LIn [2] = (char)0x01;
+ LIn [3] = (char)0x0F;
pLIn = LIn;
errorCode = U_ZERO_ERROR;
pUOut = UOut;
- ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
+ ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
{
static void TestJitterbug255()
{
- const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
- const uint8_t *testBuffer = testBytes;
- const uint8_t *testEnd = testBytes + sizeof(testBytes);
+ static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
+ const char *testBuffer = (const char *)testBytes;
+ const char *testEnd = (const char *)testBytes + sizeof(testBytes);
UErrorCode status = U_ZERO_ERROR;
- UChar32 result;
+ /*UChar32 result;*/
UConverter *cnv = 0;
cnv = ucnv_open("shift-jis", &status);
}
while (testBuffer != testEnd)
{
- result = ucnv_getNextUChar (cnv, (const char **)&testBuffer, (const char *)testEnd , &status);
+ /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
if (U_FAILURE(status))
{
log_err("Failed to convert the next UChar for SJIS.\n");
}
ucnv_close(cnv);
}
+#endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
#if !UCONFIG_NO_COLLATION
static void TestJitterbug981(){
- const UChar* rules;
- int32_t rules_length, target_cap, bytes_needed;
- UErrorCode status = U_ZERO_ERROR;
- UConverter *utf8cnv;
- UCollator* myCollator;
- char buff[50000];
- int numNeeded=0;
- utf8cnv = ucnv_open ("utf8", &status);
- if(U_FAILURE(status)){
- log_err("Could not open UTF-8 converter. Error: %s", u_errorName(status));
- return;
- }
- myCollator = ucol_open("zh", &status);
- if(U_FAILURE(status)){
- log_err("Could not open collator for zh locale. Error: %s", u_errorName(status));
- return;
- }
+ const UChar* rules;
+ int32_t rules_length, target_cap, bytes_needed, buff_size;
+ UErrorCode status = U_ZERO_ERROR;
+ UConverter *utf8cnv;
+ UCollator* myCollator;
+ char *buff;
+ int numNeeded=0;
+ utf8cnv = ucnv_open ("utf8", &status);
+ if(U_FAILURE(status)){
+ log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status));
+ return;
+ }
+ myCollator = ucol_open("zh", &status);
+ if(U_FAILURE(status)){
+ log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status));
+ ucnv_close(utf8cnv);
+ return;
+ }
- rules = ucol_getRules(myCollator, &rules_length);
+ rules = ucol_getRules(myCollator, &rules_length);
+ if(rules_length == 0) {
+ log_data_err("missing zh tailoring rule string\n");
+ ucol_close(myCollator);
+ ucnv_close(utf8cnv);
+ return;
+ }
+ buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
+ buff = malloc(buff_size);
- target_cap = 0;
- do {
- ucnv_reset(utf8cnv);
- status = U_ZERO_ERROR;
- bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
- rules, rules_length, &status);
- target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
- if(numNeeded!=0 && numNeeded!= bytes_needed){
- log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
- }
- numNeeded = bytes_needed;
- } while (status == U_BUFFER_OVERFLOW_ERROR);
- ucol_close(myCollator);
- ucnv_close(utf8cnv);
+ target_cap = 0;
+ do {
+ ucnv_reset(utf8cnv);
+ status = U_ZERO_ERROR;
+ if(target_cap >= buff_size) {
+ log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
+ break;
+ }
+ bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
+ rules, rules_length, &status);
+ target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
+ if(numNeeded!=0 && numNeeded!= bytes_needed){
+ log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
+ break;
+ }
+ numNeeded = bytes_needed;
+ } while (status == U_BUFFER_OVERFLOW_ERROR);
+ ucol_close(myCollator);
+ ucnv_close(utf8cnv);
+ free(buff);
}
#endif
+#if !UCONFIG_NO_LEGACY_CONVERSION
static void TestJitterbug1293(){
- UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
+ static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
char target[256];
UErrorCode status = U_ZERO_ERROR;
UConverter* conv=NULL;
}
ucnv_close(conv);
}
-
#endif
+
+static void TestJB5275_1(){
+
+ static const char* data = "\x3B\xB3\x0A" /* Easy characters */
+ "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
+ /* Switch script: */
+ "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
+ "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
+ "\xEF\x40\x3B\xB3\x0A";
+ static const UChar expected[] ={
+ 0x003b, 0x0a15, 0x000a, /* Easy characters */
+ 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
+ 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
+ 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
+ 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
+ };
+
+ UErrorCode status = U_ZERO_ERROR;
+ UConverter* conv = ucnv_open("iscii-gur", &status);
+ UChar dest[100] = {'\0'};
+ UChar* target = dest;
+ UChar* targetLimit = dest+100;
+ const char* source = data;
+ const char* sourceLimit = data+strlen(data);
+ const UChar* exp = expected;
+
+ if (U_FAILURE(status)) {
+ log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status));
+ return;
+ }
+
+ log_verbose("Testing switching back to default script when new line is encountered.\n");
+ ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
+ if(U_FAILURE(status)){
+ log_err("conversion failed: %s \n", u_errorName(status));
+ }
+ targetLimit = target;
+ target = dest;
+ printUSeq(target, targetLimit-target);
+ while(target<targetLimit){
+ if(*exp!=*target){
+ log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
+ }
+ target++;
+ exp++;
+ }
+ ucnv_close(conv);
+}
+
+static void TestJB5275(){
+ static const char* data =
+ /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */
+ /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */
+ /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */
+ "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
+ "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */
+ "\xEF\x48\x38\xB3\x0A" /* Kannada test */
+ "\xEF\x49\x39\xB3\x0A" /* Malayalam test */
+ "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */
+ "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */
+ /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */;
+ static const UChar expected[] ={
+ 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
+ 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */
+ 0x0038, 0x0C95, 0x000A, /* Kannada test */
+ 0x0039, 0x0D15, 0x000A, /* Malayalam test */
+ 0x003A, 0x0A95, 0x000A, /* Gujarati test */
+ 0x003B, 0x0A15, 0x000A, /* Punjabi test */
+ };
+
+ UErrorCode status = U_ZERO_ERROR;
+ UConverter* conv = ucnv_open("iscii", &status);
+ UChar dest[100] = {'\0'};
+ UChar* target = dest;
+ UChar* targetLimit = dest+100;
+ const char* source = data;
+ const char* sourceLimit = data+strlen(data);
+ const UChar* exp = expected;
+ ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
+ if(U_FAILURE(status)){
+ log_data_err("conversion failed: %s \n", u_errorName(status));
+ }
+ targetLimit = target;
+ target = dest;
+
+ printUSeq(target, targetLimit-target);
+
+ while(target<targetLimit){
+ if(*exp!=*target){
+ log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
+ }
+ target++;
+ exp++;
+ }
+ ucnv_close(conv);
+}
+
+static void
+TestIsFixedWidth() {
+ UErrorCode status = U_ZERO_ERROR;
+ UConverter *cnv = NULL;
+ int32_t i;
+
+ const char *fixedWidth[] = {
+ "US-ASCII",
+ "UTF32",
+ "ibm-5478_P100-1995"
+ };
+
+ const char *notFixedWidth[] = {
+ "GB18030",
+ "UTF8",
+ "windows-949-2000",
+ "UTF16"
+ };
+
+ for (i = 0; i < UPRV_LENGTHOF(fixedWidth); i++) {
+ cnv = ucnv_open(fixedWidth[i], &status);
+ if (cnv == NULL || U_FAILURE(status)) {
+ log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status));
+ continue;
+ }
+
+ if (!ucnv_isFixedWidth(cnv, &status)) {
+ log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth[i]);
+ }
+ ucnv_close(cnv);
+ }
+
+ for (i = 0; i < UPRV_LENGTHOF(notFixedWidth); i++) {
+ cnv = ucnv_open(notFixedWidth[i], &status);
+ if (cnv == NULL || U_FAILURE(status)) {
+ log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status));
+ continue;
+ }
+
+ if (ucnv_isFixedWidth(cnv, &status)) {
+ log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth[i]);
+ }
+ ucnv_close(cnv);
+ }
+}