/********************************************************************
* COPYRIGHT:
- * Copyright (c) 1997-2006,2008 International Business Machines Corporation and
+ * Copyright (c) 1997-2011, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/*******************************************************************************
#include "unicode/uloc.h"
#include "unicode/ucnv.h"
#include "unicode/ucnv_err.h"
+#include "unicode/ucnv_cb.h"
#include "cintltst.h"
#include "unicode/utypes.h"
#include "unicode/ustring.h"
#include "unicode/ucol.h"
+#include "unicode/utf16.h"
#include "cmemory.h"
+#include "nucnvtst.h"
+
+#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
static void TestSBCS(void);
static void TestDBCS(void);
static void TestMBCS(void);
+#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
+static void TestICCRunout(void);
+#endif
#ifdef U_ENABLE_GENERIC_ISO_2022
static void TestISO_2022(void);
static void TestISO_2022_KR(void);
static void TestISO_2022_KR_1(void);
static void TestISO_2022_CN(void);
+#if 0
+ /*
+ * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
+ */
static void TestISO_2022_CN_EXT(void);
+#endif
static void TestJIS(void);
static void TestHZ(void);
#endif
static void TestLMBCS(void);
static void TestJitterbug255(void);
static void TestEBCDICUS4XML(void);
+#if 0
+ /*
+ * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
+ */
static void TestJitterbug915(void);
+#endif
static void TestISCII(void);
static void TestCoverageMBCS(void);
static void TestJitterbug2346(void);
static void TestJitterbug2411(void);
+static void TestJB5275(void);
+static void TestJB5275_1(void);
static void TestJitterbug6175(void);
+
+static void TestIsFixedWidth(void);
#endif
+static void TestInBufSizes(void);
+
static void TestRoundTrippingAllUTF(void);
static void TestConv(const uint16_t in[],
int len,
const char* lang,
char byteArr[],
int byteArrLen);
-void addTestNewConvert(TestNode** root);
/* open a converter, using test data if it begins with '@' */
static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
void addTestNewConvert(TestNode** root)
{
+#if !UCONFIG_NO_FILE_IO
addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
+#endif
addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
#if !UCONFIG_NO_LEGACY_CONVERSION
addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
+#if !UCONFIG_NO_FILE_IO
addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
+ addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout");
+#endif
addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
#ifdef U_ENABLE_GENERIC_ISO_2022
addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
+ /*
+ * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
+ */
addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
#endif
addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
-
+ addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275");
+ addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1");
#if !UCONFIG_NO_COLLATION
addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
#endif
#endif
-#if !UCONFIG_NO_LEGACY_CONVERSION
+#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
#endif
addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
+
+ addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth");
#endif
}
log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
sourceLen, targ-junkout);
- if(VERBOSITY)
+ if(getTestOption(VERBOSITY_OPTION))
{
char junk[9999];
char offset_str[9999];
if(expectLen != targ-junkout) {
log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
- printf("\nGot:");
+ fprintf(stderr, "Got:\n");
printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
- printf("\nExpected:");
+ fprintf(stderr, "Expected:\n");
printSeqErr((const unsigned char*)expect, expectLen);
return TC_MISMATCH;
}
} else {
log_err("String does not match u->%s\n", gNuConvTestName);
printUSeqErr(source, sourceLen);
- printf("\nGot:");
+ fprintf(stderr, "Got:\n");
printSeqErr((const unsigned char *)junkout, expectLen);
- printf("\nExpected:");
+ fprintf(stderr, "Expected:\n");
printSeqErr((const unsigned char *)expect, expectLen);
return TC_MISMATCH;
log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
sourcelen, targ-junkout);
- if(VERBOSITY)
+ if(getTestOption(VERBOSITY_OPTION))
{
char junk[9999];
char offset_str[9999];
/** test chars #1 */
/* 1 2 3 1Han 2Han 3Han . */
static const UChar sampleText[] =
- { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E };
+ { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
+ static const UChar sampleTextRoundTripUnmappable[] =
+ { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
static const uint8_t expectedUTF8[] =
- { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
+ { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
static const int32_t toUTF8Offs[] =
- { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07};
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
static const int32_t fmUTF8Offs[] =
- { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };
+ { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
#ifdef U_ENABLE_GENERIC_ISO_2022
/* Same as UTF8, but with ^[%B preceeding */
/* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
static const uint8_t expectedIBM930[] =
- { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B };
+ { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
static const int32_t toIBM930Offs[] =
- { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, };
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
static const int32_t fmIBM930Offs[] =
- { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c};
+ { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
/* 1 2 3 0 h1 h2 h3 . MBCS*/
static const uint8_t expectedIBM943[] =
- { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e };
+ { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
static const int32_t toIBM943Offs [] =
- { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07 };
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
static const int32_t fmIBM943Offs[] =
- { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a};
+ { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
/* 1 2 3 0 h1 h2 h3 . DBCS*/
static const uint8_t expectedIBM9027[] =
- { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe};
+ { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
static const int32_t toIBM9027Offs [] =
- { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
+ { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
/* 1 2 3 0 <?> <?> <?> . SBCS*/
static const uint8_t expectedIBM920[] =
- { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e };
+ { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
static const int32_t toIBM920Offs [] =
- { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
/* 1 2 3 0 <?> <?> <?> . SBCS*/
static const uint8_t expectedISO88593[] =
- { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
+ { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
static const int32_t toISO88593Offs[] =
- { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
/* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
static const uint8_t expectedLATIN1[] =
- { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
+ { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
static const int32_t toLATIN1Offs[] =
- { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
/* etc */
static const uint8_t expectedUTF16BE[] =
- { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e };
+ { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
static const int32_t toUTF16BEOffs[]=
- { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
+ { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
static const int32_t fmUTF16BEOffs[] =
- { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e };
+ { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
static const uint8_t expectedUTF16LE[] =
- { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00 };
+ { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
static const int32_t toUTF16LEOffs[]=
- { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
+ { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
static const int32_t fmUTF16LEOffs[] =
- { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e };
+ { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
static const uint8_t expectedUTF32BE[] =
{ 0x00, 0x00, 0x00, 0x31,
0x00, 0x00, 0x4e, 0x00,
0x00, 0x00, 0x4e, 0x8c,
0x00, 0x00, 0x4e, 0x09,
- 0x00, 0x00, 0x00, 0x2e };
+ 0x00, 0x00, 0x00, 0x2e,
+ 0x00, 0x02, 0x00, 0x21 };
static const int32_t toUTF32BEOffs[]=
{ 0x00, 0x00, 0x00, 0x00,
0x01, 0x01, 0x01, 0x01,
0x05, 0x05, 0x05, 0x05,
0x06, 0x06, 0x06, 0x06,
0x07, 0x07, 0x07, 0x07,
+ 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08 };
static const int32_t fmUTF32BEOffs[] =
- { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c };
+ { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
static const uint8_t expectedUTF32LE[] =
{ 0x31, 0x00, 0x00, 0x00,
0x00, 0x4e, 0x00, 0x00,
0x8c, 0x4e, 0x00, 0x00,
0x09, 0x4e, 0x00, 0x00,
- 0x2e, 0x00, 0x00, 0x00 };
+ 0x2e, 0x00, 0x00, 0x00,
+ 0x21, 0x00, 0x02, 0x00 };
static const int32_t toUTF32LEOffs[]=
{ 0x00, 0x00, 0x00, 0x00,
0x01, 0x01, 0x01, 0x01,
0x05, 0x05, 0x05, 0x05,
0x06, 0x06, 0x06, 0x06,
0x07, 0x07, 0x07, 0x07,
+ 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08 };
static const int32_t fmUTF32LEOffs[] =
- { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c };
+ { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
#if !UCONFIG_NO_LEGACY_CONVERSION
/*EBCDIC_STATEFUL*/
- testConvertToU(expectedIBM930, sizeof(expectedIBM930),
- sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-930", fmIBM930Offs,FALSE);
+ testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable,
+ sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-930", fmIBM930Offs,FALSE);
/*MBCS*/
- testConvertToU(expectedIBM943, sizeof(expectedIBM943),
- sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-943", fmIBM943Offs,FALSE);
+ testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable,
+ sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-943", fmIBM943Offs,FALSE);
#endif
/* Try it again to make sure it still works */
Hi Mom -+Jjo--!
A+ImIDkQ.
+-
- +ZeVnLIqe
+ +ZeVnLIqe-
*/
0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
0x2b, 0x2d,
- 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
+ 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
};
static const UChar unicode[] = {
/*
0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
11, 12, 12, 12, 13, 13, 13, 13, 14,
15, 15,
- 16, 16, 16, 17, 17, 17, 18, 18, 18
+ 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
};
/* same but escaping set O (the exclamation mark) */
Hi Mom -+Jjo--+ACE-
A+ImIDkQ.
+-
- +ZeVnLIqe
+ +ZeVnLIqe-
*/
0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
0x2b, 0x2d,
- 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
+ 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
};
static const int32_t toUnicodeOffsetsR[] = {
0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
11, 12, 12, 12, 13, 13, 13, 13, 14,
15, 15,
- 16, 16, 16, 17, 17, 17, 18, 18, 18
+ 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
};
testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
static void
TestAmbiguousConverter(UConverter *cnv) {
- static const char inBytes[2]={ 0x61, 0x5c };
+ static const char inBytes[3]={ 0x61, 0x5B, 0x5c };
UChar outUnicode[20]={ 0, 0, 0, 0 };
const char *s;
UErrorCode errorCode;
UBool isAmbiguous;
- /* try to convert an 'a' and a US-ASCII backslash */
+ /* try to convert an 'a', a square bracket and a US-ASCII backslash */
errorCode=U_ZERO_ERROR;
s=inBytes;
u=outUnicode;
- ucnv_toUnicode(cnv, &u, u+20, &s, s+2, NULL, TRUE, &errorCode);
+ ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode);
if(U_FAILURE(errorCode)) {
/* we do not care about general failures in this test; the input may just not be mappable */
return;
}
- if(outUnicode[0]!=0x61 || outUnicode[1]==0xfffd) {
- /* not an ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
+ if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) {
+ /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
+ /* There are some encodings that are partially ASCII based,
+ like the ISO-7 and GSM series of codepages, which we ignore. */
return;
}
isAmbiguous=ucnv_isAmbiguous(cnv);
/* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
- if((outUnicode[1]!=0x5c)!=isAmbiguous) {
+ if((outUnicode[2]!=0x5c)!=isAmbiguous) {
log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
- ucnv_getName(cnv, &errorCode), outUnicode[1]!=0x5c, isAmbiguous);
+ ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous);
return;
}
- if(outUnicode[1]!=0x5c) {
+ if(outUnicode[2]!=0x5c) {
/* needs fixup, fix it */
ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
- if(outUnicode[1]!=0x5c) {
+ if(outUnicode[2]!=0x5c) {
/* the fix failed */
log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
return;
0
};
UChar asciiResult[200], sjisResult[200];
- int32_t asciiLength = 0, sjisLength = 0, i;
+ int32_t /*asciiLength = 0,*/ sjisLength = 0, i;
const char *name;
/* enumerate all converters */
return;
}
/* convert target from Latin-1 to Unicode */
- asciiLength = ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status);
+ /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status);
if (U_FAILURE(status))
{
log_err("Failed to convert the Latin-1 string.\n");
- free(sjisResult);
ucnv_close(sjis_cnv);
ucnv_close(ascii_cnv);
return;
if (!ucnv_isAmbiguous(sjis_cnv))
{
log_err("SJIS converter should contain ambiguous character mappings.\n");
- free(sjisResult);
- free(asciiResult);
ucnv_close(sjis_cnv);
ucnv_close(ascii_cnv);
return;
}
}
-void
-static TestUTF7() {
+static void TestUTF7() {
/* test input */
static const uint8_t in[]={
/* H - +Jjo- - ! +- +2AHcAQ */
ucnv_close(cnv);
}
-void
-static TestIMAP() {
+static void TestIMAP() {
/* test input */
static const uint8_t in[]={
/* H - &Jjo- - ! &- &2AHcAQ- \ */
ucnv_close(cnv);
}
-void
-static TestUTF8() {
+static void TestUTF8() {
/* test input */
static const uint8_t in[]={
0x61,
ucnv_close(cnv);
}
-void
-static TestCESU8() {
+static void TestCESU8() {
/* test input */
static const uint8_t in[]={
0x61,
ucnv_close(cnv);
}
-void
-static TestUTF16() {
+static void TestUTF16() {
/* test input */
static const uint8_t in1[]={
0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
ucnv_close(cnv);
}
-void
-static TestUTF16BE() {
+static void TestUTF16BE() {
/* test input */
static const uint8_t in[]={
0x00, 0x61,
ucnv_close(cnv);
}
-void
-static TestUTF32() {
+static void TestUTF32() {
/* test input */
static const uint8_t in1[]={
0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
if(U_FAILURE(errorCode)) {
- log_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
+ log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
return;
}
TestNextUChar(cnv, source, limit, results, "LATIN_1");
}
+#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
+static void
+TestICCRunout() {
+/* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
+
+ const char *cnvName = "ibm-1363";
+ UErrorCode status = U_ZERO_ERROR;
+ const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 };
+ /* UChar expectUData[] = { 0x00a1, 0x001a }; */
+ const char *source = sourceData;
+ const char *sourceLim = sourceData+sizeof(sourceData);
+ UChar c1, c2, c3;
+ UConverter *cnv=ucnv_open(cnvName, &status);
+ if(U_FAILURE(status)) {
+ log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status));
+ return;
+ }
+
+#if 0
+ {
+ UChar targetBuf[256];
+ UChar *target = targetBuf;
+ UChar *targetLim = target+256;
+ ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status);
+
+ log_info("After convert: target@%d, source@%d, status%s\n",
+ target-targetBuf, source-sourceData, u_errorName(status));
+
+ if(U_FAILURE(status)) {
+ log_err("Failed to convert: %s\n", u_errorName(status));
+ } else {
+
+ }
+ }
+#endif
+
+ c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
+ log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status));
+
+ c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
+ log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status));
+
+ c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
+ log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status));
+
+ if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) {
+ log_verbose("OK\n");
+ } else {
+ log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
+ }
+
+ ucnv_close(cnv);
+
+}
+#endif
+
#ifdef U_ENABLE_GENERIC_ISO_2022
static void
char *cTarget;
const char *cTargetLimit;
char *cBuf;
- UChar *uBuf,*test;
+ UChar *uBuf; /*,*test;*/
int32_t uBufSize = 120;
int len=0;
int i=2;
}while(cSource<cSourceLimit);
uSource = source;
- test =uBuf;
+ /*test =uBuf;*/
for(len=0;len<(int)(source - sourceLimit);len++){
if(uBuf[len]!=uSource[len]){
log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
char *cTarget;
const char *cTargetLimit;
char *cBuf;
- UChar *uBuf,*test;
+ UChar *uBuf; /*,*test;*/
int32_t uBufSize = 120;
int len=0;
int i=2;
}while(cSource<cTarget);
uSource = source;
- test =uBuf;
+ /*test =uBuf;*/
for(;len<(int)(source - sourceLimit);len++){
if(uBuf[len]!=uSource[len]){
log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
static void
TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
const uint16_t results[], const char* message){
- const char* s0;
+/* const char* s0; */
const char* s=(char*)source;
const uint16_t *r=results;
UErrorCode errorCode=U_ZERO_ERROR;
uint32_t c,exC;
ucnv_reset(cnv);
while(s<limit) {
- s0=s;
+ /* s0=s; */
c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
break; /* no more significant input */
log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
break;
} else {
- if(UTF_IS_FIRST_SURROGATE(*r)){
+ if(U16_IS_LEAD(*r)){
int i =0, len = 2;
- UTF_NEXT_CHAR_SAFE(r, i, len, exC, FALSE);
+ U16_NEXT(r, i, len, exC);
r++;
}else{
exC = *r;
0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
- 0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
+ 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
0x201D, 0x3014, 0x000D, 0x000A,
0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
}
if(dstIndex < dstLen){
if(c>0xFFFF){
- dst[dstIndex++] = UTF16_LEAD(c);
+ dst[dstIndex++] = U16_LEAD(c);
if(dstIndex<dstLen){
- dst[dstIndex]=UTF16_TRAIL(c);
+ dst[dstIndex]=U16_TRAIL(c);
}else{
*status=U_BUFFER_OVERFLOW_ERROR;
}
usource[0] =(UChar) i;
len=1;
}else{
- usource[0]=UTF16_LEAD(i);
- usource[1]=UTF16_TRAIL(i);
+ usource[0]=U16_LEAD(i);
+ usource[1]=U16_TRAIL(i);
len=2;
}
ulen=len;
static void
TestRoundTrippingAllUTF(void){
- if(!QUICK){
+ if(!getTestOption(QUICK_OPTION)){
log_verbose("Running exhaustive round trip test for BOCU-1\n");
TestFullRoundtrip("BOCU-1");
log_verbose("Running exhaustive round trip test for SCSU\n");
TestFullRoundtrip("UTF-7,version=1");
log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
TestFullRoundtrip("IMAP-mailbox-name");
- log_verbose("Running exhaustive round trip test for GB18030\n");
- TestFullRoundtrip("GB18030");
+ /*
+ *
+ * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of
+ * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA).
+ * The old mappings remain as fallbacks.
+ * This test may be reintroduced at a later time.
+ *
+ * 110118 - mow
+ */
+ /*
+ log_verbose("Running exhaustive round trip test for GB18030\n");
+ TestFullRoundtrip("GB18030");
+ */
}
}
0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
- 0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
+ 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
0x201D, 0x000D, 0x000A,
0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
}
+
+#if 0
+ ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
+
static void TestJitterbug915(){
/* tests for roundtripping of the below sequence
\x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * /
free(cBuf);
free(offsets);
}
+#endif
static void
TestISO_2022_CN() {
/* Callback for TestJitterbug6175, should only get called for empty segment errors */
static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
- if (reason > UCNV_IRREGULAR)
+ if (reason > UCNV_IRREGULAR) {
return;
- if (reason != UCNV_IRREGULAR)
+ }
+ if (reason != UCNV_IRREGULAR) {
log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
+ }
/* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
*err = U_ZERO_ERROR;
ucnv_cbToUWriteSub(toArgs,0,err);
errorCode=U_ZERO_ERROR;
/* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
- ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn-1,off,FALSE, &errorCode);
+ pUIn++;
+ ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode);
if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
{
log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
}
+ pUIn--;
+
errorCode=U_ZERO_ERROR;
ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
const char *testBuffer = (const char *)testBytes;
const char *testEnd = (const char *)testBytes + sizeof(testBytes);
UErrorCode status = U_ZERO_ERROR;
- UChar32 result;
+ /*UChar32 result;*/
UConverter *cnv = 0;
cnv = ucnv_open("shift-jis", &status);
}
while (testBuffer != testEnd)
{
- result = ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
+ /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
if (U_FAILURE(status))
{
log_err("Failed to convert the next UChar for SJIS.\n");
int numNeeded=0;
utf8cnv = ucnv_open ("utf8", &status);
if(U_FAILURE(status)){
- log_err("Could not open UTF-8 converter. Error: %s", u_errorName(status));
+ log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status));
return;
}
myCollator = ucol_open("zh", &status);
if(U_FAILURE(status)){
- log_err("Could not open collator for zh locale. Error: %s", u_errorName(status));
+ log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status));
+ ucnv_close(utf8cnv);
return;
}
status = U_ZERO_ERROR;
if(target_cap >= buff_size) {
log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
- return;
+ break;
}
bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
rules, rules_length, &status);
target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
if(numNeeded!=0 && numNeeded!= bytes_needed){
log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
+ break;
}
numNeeded = bytes_needed;
} while (status == U_BUFFER_OVERFLOW_ERROR);
}
ucnv_close(conv);
}
+static void TestJB5275_1(){
+
+ static const char* data = "\x3B\xB3\x0A" /* Easy characters */
+ "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
+ /* Switch script: */
+ "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
+ "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
+ "\xEF\x40\x3B\xB3\x0A";
+ static const UChar expected[] ={
+ 0x003b, 0x0a15, 0x000a, /* Easy characters */
+ 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
+ 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
+ 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
+ 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
+ };
+
+ UErrorCode status = U_ZERO_ERROR;
+ UConverter* conv = ucnv_open("iscii-gur", &status);
+ UChar dest[100] = {'\0'};
+ UChar* target = dest;
+ UChar* targetLimit = dest+100;
+ const char* source = data;
+ const char* sourceLimit = data+strlen(data);
+ const UChar* exp = expected;
+
+ if (U_FAILURE(status)) {
+ log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status));
+ return;
+ }
+
+ log_verbose("Testing switching back to default script when new line is encountered.\n");
+ ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
+ if(U_FAILURE(status)){
+ log_err("conversion failed: %s \n", u_errorName(status));
+ }
+ targetLimit = target;
+ target = dest;
+ printUSeq(target, targetLimit-target);
+ while(target<targetLimit){
+ if(*exp!=*target){
+ log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
+ }
+ target++;
+ exp++;
+ }
+ ucnv_close(conv);
+}
+static void TestJB5275(){
+ static const char* data =
+ /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */
+ /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */
+ /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */
+ "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
+ "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */
+ "\xEF\x48\x38\xB3\x0A" /* Kannada test */
+ "\xEF\x49\x39\xB3\x0A" /* Malayalam test */
+ "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */
+ "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */
+ /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */;
+ static const UChar expected[] ={
+ 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
+ 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */
+ 0x0038, 0x0C95, 0x000A, /* Kannada test */
+ 0x0039, 0x0D15, 0x000A, /* Malayalam test */
+ 0x003A, 0x0A95, 0x000A, /* Gujarati test */
+ 0x003B, 0x0A15, 0x000A, /* Punjabi test */
+ };
+
+ UErrorCode status = U_ZERO_ERROR;
+ UConverter* conv = ucnv_open("iscii", &status);
+ UChar dest[100] = {'\0'};
+ UChar* target = dest;
+ UChar* targetLimit = dest+100;
+ const char* source = data;
+ const char* sourceLimit = data+strlen(data);
+ const UChar* exp = expected;
+ ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
+ if(U_FAILURE(status)){
+ log_err("conversion failed: %s \n", u_errorName(status));
+ }
+ targetLimit = target;
+ target = dest;
+
+ printUSeq(target, targetLimit-target);
+
+ while(target<targetLimit){
+ if(*exp!=*target){
+ log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
+ }
+ target++;
+ exp++;
+ }
+ ucnv_close(conv);
+}
+
+static void
+TestIsFixedWidth() {
+ UErrorCode status = U_ZERO_ERROR;
+ UConverter *cnv = NULL;
+ int32_t i;
+
+ const char *fixedWidth[] = {
+ "US-ASCII",
+ "UTF32",
+ "ibm-5478_P100-1995"
+ };
+
+ const char *notFixedWidth[] = {
+ "GB18030",
+ "UTF8",
+ "windows-949-2000",
+ "UTF16"
+ };
+
+ for (i = 0; i < LENGTHOF(fixedWidth); i++) {
+ cnv = ucnv_open(fixedWidth[i], &status);
+ if (cnv == NULL || U_FAILURE(status)) {
+ log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status));
+ continue;
+ }
+
+ if (!ucnv_isFixedWidth(cnv, &status)) {
+ log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth[i]);
+ }
+ ucnv_close(cnv);
+ }
+
+ for (i = 0; i < LENGTHOF(notFixedWidth); i++) {
+ cnv = ucnv_open(notFixedWidth[i], &status);
+ if (cnv == NULL || U_FAILURE(status)) {
+ log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status));
+ continue;
+ }
+
+ if (ucnv_isFixedWidth(cnv, &status)) {
+ log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth[i]);
+ }
+ ucnv_close(cnv);
+ }
+}