+static void TestOneDefaultNameChange(const char *name, const char *expected) {
+ UErrorCode status = U_ZERO_ERROR;
+ UConverter *cnv;
+ ucnv_setDefaultName(name);
+ if(strcmp(ucnv_getDefaultName(), expected)==0)
+ log_verbose("setDefaultName of %s works.\n", name);
+ else
+ log_err("setDefaultName of %s failed\n", name);
+ cnv=ucnv_open(NULL, &status);
+ if (U_FAILURE(status) || cnv == NULL) {
+ log_err("opening the default converter of %s failed\n", name);
+ return;
+ }
+ if(strcmp(ucnv_getName(cnv, &status), expected)==0)
+ log_verbose("ucnv_getName of %s works.\n", name);
+ else
+ log_err("ucnv_getName of %s failed\n", name);
+ ucnv_close(cnv);
+}
+
+static void TestDefaultName(void) {
+ /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/
+ static char defaultName[UCNV_MAX_CONVERTER_NAME_LENGTH + 1];
+ strcpy(defaultName, ucnv_getDefaultName());
+
+ log_verbose("getDefaultName returned %s\n", defaultName);
+
+ /*change the default name by setting it */
+ TestOneDefaultNameChange("UTF-8", "UTF-8");
+#if U_CHARSET_IS_UTF8
+ TestOneDefaultNameChange("ISCII,version=1", "UTF-8");
+ TestOneDefaultNameChange("ISCII,version=2", "UTF-8");
+ TestOneDefaultNameChange("ISO-8859-1", "UTF-8");
+#else
+# if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
+ TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1");
+ TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2");
+# endif
+ TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1");
+#endif
+
+ /*set the default name back*/
+ ucnv_setDefaultName(defaultName);
+}
+
+/* Test that ucnv_compareNames() matches names according to spec. ----------- */
+
+static int
+sign(int n) {
+ if(n==0) {
+ return 0;
+ } else if(n<0) {
+ return -1;
+ } else /* n>0 */ {
+ return 1;
+ }
+}
+
+static void
+compareNames(const char **names) {
+ const char *relation, *name1, *name2;
+ int rel, result;
+
+ relation=*names++;
+ if(*relation=='=') {
+ rel = 0;
+ } else if(*relation=='<') {
+ rel = -1;
+ } else {
+ rel = 1;
+ }
+
+ name1=*names++;
+ if(name1==NULL) {
+ return;
+ }
+ while((name2=*names++)!=NULL) {
+ result=ucnv_compareNames(name1, name2);
+ if(sign(result)!=rel) {
+ log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1, name2, result, rel);
+ }
+ name1=name2;
+ }
+}
+
+static void
+TestCompareNames() {
+ static const char *equalUTF8[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL };
+ static const char *equalIBM[]={ "=", "ibm-37", "IBM037", "i-B-m 00037", "ibm-0037", "IBM00037", NULL };
+ static const char *lessMac[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL };
+ static const char *lessUTF080[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL };
+
+ compareNames(equalUTF8);
+ compareNames(equalIBM);
+ compareNames(lessMac);
+ compareNames(lessUTF080);
+}
+
+static void
+TestSubstString() {
+ static const UChar surrogate[1]={ 0xd900 };
+ char buffer[16];
+
+ static const UChar sub[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
+ static const char subChars[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
+ UConverter *cnv;
+ UErrorCode errorCode;
+ int32_t length;
+ int8_t len8;
+
+ /* UTF-16/32: test that the BOM is output before the sub character */
+ errorCode=U_ZERO_ERROR;
+ cnv=ucnv_open("UTF-16", &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode));
+ return;
+ }
+ length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode);
+ ucnv_close(cnv);
+ if(U_FAILURE(errorCode) ||
+ length!=4 ||
+ NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode)
+ ) {
+ log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n");
+ }
+
+ errorCode=U_ZERO_ERROR;
+ cnv=ucnv_open("UTF-32", &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode));
+ return;
+ }
+ length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode);
+ ucnv_close(cnv);
+ if(U_FAILURE(errorCode) ||
+ length!=8 ||
+ NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode)
+ ) {
+ log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n");
+ }
+
+ /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */
+ errorCode=U_ZERO_ERROR;
+ cnv=ucnv_open("ISO-8859-1", &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode));
+ return;
+ }
+ ucnv_setSubstString(cnv, sub, UPRV_LENGTHOF(sub), &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode));
+ } else {
+ len8 = sizeof(buffer);
+ ucnv_getSubstChars(cnv, buffer, &len8, &errorCode);
+ /* Stateless converter, we expect the string converted to charset bytes. */
+ if(U_FAILURE(errorCode) || len8!=sizeof(subChars) || 0!=uprv_memcmp(buffer, subChars, len8)) {
+ log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode));
+ }
+ }
+ ucnv_close(cnv);
+
+#if !UCONFIG_NO_LEGACY_CONVERSION
+ errorCode=U_ZERO_ERROR;
+ cnv=ucnv_open("HZ", &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode));
+ return;
+ }
+ ucnv_setSubstString(cnv, sub, UPRV_LENGTHOF(sub), &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode));
+ } else {
+ len8 = sizeof(buffer);
+ ucnv_getSubstChars(cnv, buffer, &len8, &errorCode);
+ /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */
+ if(U_FAILURE(errorCode) || len8!=0) {
+ log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode));
+ }
+ }
+ ucnv_close(cnv);
+#endif
+ /*
+ * Further testing of ucnv_setSubstString() is done via intltest convert.
+ * We do not test edge cases of illegal arguments and similar because the
+ * function implementation uses all of its parameters in calls to other
+ * functions with UErrorCode parameters.
+ */
+}
+
+static void
+InvalidArguments() {
+ UConverter *cnv;
+ UErrorCode errorCode;
+ char charBuffer[2] = {1, 1};
+ char ucharAsCharBuffer[2] = {2, 2};
+ char *charsPtr = charBuffer;
+ UChar *ucharsPtr = (UChar *)ucharAsCharBuffer;
+ UChar *ucharsBadPtr = (UChar *)(ucharAsCharBuffer + 1);
+
+ errorCode=U_ZERO_ERROR;
+ cnv=ucnv_open("UTF-8", &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err("ucnv_open() failed - %s\n", u_errorName(errorCode));
+ return;
+ }
+
+ errorCode=U_ZERO_ERROR;
+ /* This one should fail because an incomplete UChar is being passed in */
+ ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsPtr, ucharsBadPtr, NULL, TRUE, &errorCode);
+ if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
+ log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode));
+ }
+
+ errorCode=U_ZERO_ERROR;
+ /* This one should fail because ucharsBadPtr is > than ucharsPtr */
+ ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsBadPtr, ucharsPtr, NULL, TRUE, &errorCode);
+ if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
+ log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode));
+ }
+
+ errorCode=U_ZERO_ERROR;
+ /* This one should fail because an incomplete UChar is being passed in */
+ ucnv_toUnicode(cnv, &ucharsPtr, ucharsBadPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode);
+ if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
+ log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode));
+ }
+
+ errorCode=U_ZERO_ERROR;
+ /* This one should fail because ucharsBadPtr is > than ucharsPtr */
+ ucnv_toUnicode(cnv, &ucharsBadPtr, ucharsPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode);
+ if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
+ log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode));
+ }
+
+ if (charBuffer[0] != 1 || charBuffer[1] != 1
+ || ucharAsCharBuffer[0] != 2 || ucharAsCharBuffer[1] != 2)
+ {
+ log_err("Data was incorrectly written to buffers\n");
+ }
+
+ ucnv_close(cnv);
+}
+
+static void TestGetName() {
+ static const char *const names[] = {
+ "Unicode", "UTF-16",
+ "UnicodeBigUnmarked", "UTF-16BE",
+ "UnicodeBig", "UTF-16BE,version=1",
+ "UnicodeLittleUnmarked", "UTF-16LE",
+ "UnicodeLittle", "UTF-16LE,version=1",
+ "x-UTF-16LE-BOM", "UTF-16LE,version=1"
+ };
+ int32_t i;
+ for(i = 0; i < UPRV_LENGTHOF(names); i += 2) {
+ UErrorCode errorCode = U_ZERO_ERROR;
+ UConverter *cnv = ucnv_open(names[i], &errorCode);
+ if(U_SUCCESS(errorCode)) {
+ const char *name = ucnv_getName(cnv, &errorCode);
+ if(U_FAILURE(errorCode) || 0 != strcmp(name, names[i+1])) {
+ log_err("ucnv_getName(%s) = %s != %s -- %s\n",
+ names[i], name, names[i+1], u_errorName(errorCode));
+ }
+ ucnv_close(cnv);
+ }
+ }
+}
+
+static void TestUTFBOM() {
+ static const UChar a16[] = { 0x61 };
+ static const char *const names[] = {
+ "UTF-16",
+ "UTF-16,version=1",
+ "UTF-16BE",
+ "UnicodeBig",
+ "UTF-16LE",
+ "UnicodeLittle"
+ };
+ static const uint8_t expected[][5] = {
+#if U_IS_BIG_ENDIAN
+ { 4, 0xfe, 0xff, 0, 0x61 },
+ { 4, 0xfe, 0xff, 0, 0x61 },
+#else
+ { 4, 0xff, 0xfe, 0x61, 0 },
+ { 4, 0xff, 0xfe, 0x61, 0 },
+#endif
+
+ { 2, 0, 0x61 },
+ { 4, 0xfe, 0xff, 0, 0x61 },
+
+ { 2, 0x61, 0 },
+ { 4, 0xff, 0xfe, 0x61, 0 }
+ };
+
+ char bytes[10];
+ int32_t i;
+
+ for(i = 0; i < UPRV_LENGTHOF(names); ++i) {
+ UErrorCode errorCode = U_ZERO_ERROR;
+ UConverter *cnv = ucnv_open(names[i], &errorCode);
+ int32_t length = 0;
+ const uint8_t *exp = expected[i];
+ if (U_FAILURE(errorCode)) {
+ log_err_status(errorCode, "Unable to open converter: %s got error code: %s\n", names[i], u_errorName(errorCode));
+ continue;
+ }
+ length = ucnv_fromUChars(cnv, bytes, (int32_t)sizeof(bytes), a16, 1, &errorCode);
+
+ if(U_FAILURE(errorCode) || length != exp[0] || 0 != memcmp(bytes, exp+1, length)) {
+ log_err("unexpected %s BOM writing behavior -- %s\n",
+ names[i], u_errorName(errorCode));
+ }
+ ucnv_close(cnv);
+ }
+}