/*
*******************************************************************************
- * Copyright (C) 1997-2006, International Business Machines
+ * Copyright (C) 1997-2016 International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* Date Name Description
#include <string.h>
#include "unicode/utrans.h"
#include "unicode/ustring.h"
+#include "unicode/uset.h"
#include "cintltst.h"
+#include "cmemory.h"
#define TEST(x) addTest(root, &x, "utrans/" # x)
static void TestRegisterUnregister(void);
static void TestExtractBetween(void);
static void TestUnicodeIDs(void);
+static void TestGetRulesAndSourceSet(void);
+static void TestDataVariantsCompounds(void);
static void _expectRules(const char*, const char*, const char*);
static void _expect(const UTransliterator* trans, const char* cfrom, const char* cto);
TEST(TestRegisterUnregister);
TEST(TestExtractBetween);
TEST(TestUnicodeIDs);
+ TEST(TestGetRulesAndSourceSet);
+ TEST(TestDataVariantsCompounds);
}
/*------------------------------------------------------------------
"Hex-Any"
};
- for(i=0; i<sizeof(TransID)/sizeof(TransID[0]); i=i+2){
+ for(i=0; i<UPRV_LENGTHOF(TransID); i=i+2){
status = U_ZERO_ERROR;
t1=utrans_open(TransID[i], UTRANS_FORWARD,NULL,0,NULL, &status);
if(t1 == NULL || U_FAILURE(status)){
- log_err("FAIL: in instantiation for id=%s\n", TransID[i]);
+ log_data_err("FAIL: in instantiation for id=%s -> %s (Are you missing data?)\n", TransID[i], u_errorName(status));
continue;
}
inverse1=utrans_openInverse(t1, &status);
t1=utrans_open("Latin-Devanagari", UTRANS_FORWARD, NULL,0,NULL,&status);
if(U_FAILURE(status)){
- log_err("FAIL: construction\n");
+ log_data_err("FAIL: construction -> %s (Are you missing data?)\n", u_errorName(status));
return;
}
t2=utrans_open("Latin-Greek", UTRANS_FORWARD, NULL,0,NULL,&status);
"$lu > '*';"
"a > ERROR",
"abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
+
+ /* Test multiple passes
+ */
+ _expectRules("abc > xy;"
+ "::Null;"
+ "aba > z;",
+ "abc ababc aba", "xy abxy z");
}
static void TestFilter() {
"abcde",
"\\u0061\\u0062\\u0063\\u0064\\u0065"
};
- int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
+ int32_t DATA_length = UPRV_LENGTHOF(DATA);
int32_t i;
UTransliterator* hex = utrans_open("Any-Hex", UTRANS_FORWARD, NULL,0,NULL,&status);
}
}
+/**
+ * Test utrans_toRules, utrans_getSourceSet
+ */
+
+/* A simple transform with a small filter & source set: rules 50-100 chars unescaped, 100-200 chars escaped,
+ filter & source set 4-20 chars */
+static const UChar transSimpleID[] = { 0x79,0x6F,0x2D,0x79,0x6F,0x5F,0x42,0x4A,0 }; /* "yo-yo_BJ" */
+static const char* transSimpleCName = "yo-yo_BJ";
+
+enum { kUBufMax = 256 };
+static void TestGetRulesAndSourceSet() {
+ UErrorCode status = U_ZERO_ERROR;
+ UTransliterator *utrans = utrans_openU(transSimpleID, -1, UTRANS_FORWARD, NULL, 0, NULL, &status);
+ if ( U_SUCCESS(status) ) {
+ USet* uset;
+ UChar ubuf[kUBufMax];
+ int32_t ulen;
+
+ status = U_ZERO_ERROR;
+ ulen = utrans_toRules(utrans, FALSE, ubuf, kUBufMax, &status);
+ if ( U_FAILURE(status) || ulen <= 50 || ulen >= 100) {
+ log_err("FAIL: utrans_toRules unescaped, expected noErr and len 50-100, got error=%s and len=%d\n",
+ u_errorName(status), ulen);
+ }
+
+ status = U_ZERO_ERROR;
+ ulen = utrans_toRules(utrans, FALSE, NULL, 0, &status);
+ if ( status != U_BUFFER_OVERFLOW_ERROR || ulen <= 50 || ulen >= 100) {
+ log_err("FAIL: utrans_toRules unescaped, expected U_BUFFER_OVERFLOW_ERROR and len 50-100, got error=%s and len=%d\n",
+ u_errorName(status), ulen);
+ }
+
+ status = U_ZERO_ERROR;
+ ulen = utrans_toRules(utrans, TRUE, ubuf, kUBufMax, &status);
+ if ( U_FAILURE(status) || ulen <= 100 || ulen >= 200) {
+ log_err("FAIL: utrans_toRules escaped, expected noErr and len 100-200, got error=%s and len=%d\n",
+ u_errorName(status), ulen);
+ }
+
+ status = U_ZERO_ERROR;
+ uset = utrans_getSourceSet(utrans, FALSE, NULL, &status);
+ ulen = uset_toPattern(uset, ubuf, kUBufMax, FALSE, &status);
+ uset_close(uset);
+ if ( U_FAILURE(status) || ulen <= 4 || ulen >= 20) {
+ log_err("FAIL: utrans_getSourceSet useFilter, expected noErr and len 4-20, got error=%s and len=%d\n",
+ u_errorName(status), ulen);
+ }
+
+ status = U_ZERO_ERROR;
+ uset = utrans_getSourceSet(utrans, TRUE, NULL, &status);
+ ulen = uset_toPattern(uset, ubuf, kUBufMax, FALSE, &status);
+ uset_close(uset);
+ if ( U_FAILURE(status) || ulen <= 4 || ulen >= 20) {
+ log_err("FAIL: utrans_getSourceSet ignoreFilter, expected noErr and len 4-20, got error=%s and len=%d\n",
+ u_errorName(status), ulen);
+ }
+
+ utrans_close(utrans);
+ } else {
+ log_data_err("FAIL: utrans_openRules(%s) failed, error=%s (Are you missing data?)\n",
+ transSimpleCName, u_errorName(status));
+ }
+}
+
+typedef struct {
+ const char * transID;
+ const char * sourceText;
+ const char * targetText;
+} TransIDSourceTarg;
+
+static const TransIDSourceTarg dataVarCompItems[] = {
+ { "Simplified-Traditional",
+ "\\u4E0B\\u9762\\u662F\\u4E00\\u4E9B\\u4ECE\\u7B80\\u4F53\\u8F6C\\u6362\\u4E3A\\u7E41\\u4F53\\u5B57\\u793A\\u4F8B\\u6587\\u672C\\u3002",
+ "\\u4E0B\\u9762\\u662F\\u4E00\\u4E9B\\u5F9E\\u7C21\\u9AD4\\u8F49\\u63DB\\u70BA\\u7E41\\u9AD4\\u5B57\\u793A\\u4F8B\\u6587\\u672C\\u3002" },
+ { "Halfwidth-Fullwidth",
+ "Sample text, \\uFF7B\\uFF9D\\uFF8C\\uFF9F\\uFF99\\uFF83\\uFF77\\uFF7D\\uFF84.",
+ "\\uFF33\\uFF41\\uFF4D\\uFF50\\uFF4C\\uFF45\\u3000\\uFF54\\uFF45\\uFF58\\uFF54\\uFF0C\\u3000\\u30B5\\u30F3\\u30D7\\u30EB\\u30C6\\u30AD\\u30B9\\u30C8\\uFF0E" },
+ { "Han-Latin/Names; Latin-Bopomofo",
+ "\\u4E07\\u4FDF\\u919C\\u5974\\u3001\\u533A\\u695A\\u826F\\u3001\\u4EFB\\u70E8\\u3001\\u5CB3\\u98DB",
+ "\\u3107\\u311B\\u02CB \\u3111\\u3127\\u02CA \\u3114\\u3121\\u02C7 \\u310B\\u3128\\u02CA\\u3001 \\u3121 \\u3114\\u3128\\u02C7 \\u310C\\u3127\\u3124\\u02CA\\u3001 \\u3116\\u3123\\u02CA \\u3127\\u311D\\u02CB\\u3001 \\u3129\\u311D\\u02CB \\u3108\\u311F" },
+ { "Greek-Latin",
+ "\\u1F08 \\u1FBC \\u1F89 \\u1FEC",
+ "A \\u0100I H\\u0100I RH" },
+ { "Greek-Latin/BGN",
+ "\\u1F08 \\u1FBC \\u1F89 \\u1FEC",
+ "A\\u0313 A\\u0345 A\\u0314\\u0345 \\u1FEC" },
+ { "Greek-Latin/UNGEGN",
+ "\\u1F08 \\u1FBC \\u1F89 \\u1FEC",
+ "A A A R" },
+ { NULL, NULL, NULL }
+};
+
+enum { kBBufMax = 384 };
+static void TestDataVariantsCompounds() {
+ const TransIDSourceTarg* itemsPtr;
+ for (itemsPtr = dataVarCompItems; itemsPtr->transID != NULL; itemsPtr++) {
+ UErrorCode status = U_ZERO_ERROR;
+ UChar utrid[kUBufMax];
+ int32_t utridlen = u_unescape(itemsPtr->transID, utrid, kUBufMax);
+ UTransliterator* utrans = utrans_openU(utrid, utridlen, UTRANS_FORWARD, NULL, 0, NULL, &status);
+ if (U_FAILURE(status)) {
+ log_data_err("FAIL: utrans_openRules(%s) failed, error=%s (Are you missing data?)\n", itemsPtr->transID, u_errorName(status));
+ continue;
+ }
+ UChar text[kUBufMax];
+ int32_t textLen = u_unescape(itemsPtr->sourceText, text, kUBufMax);
+ int32_t textLim = textLen;
+ utrans_transUChars(utrans, text, &textLen, kUBufMax, 0, &textLim, &status);
+ if (U_FAILURE(status)) {
+ log_err("FAIL: utrans_transUChars(%s) failed, error=%s\n", itemsPtr->transID, u_errorName(status));
+ } else {
+ UChar expect[kUBufMax];
+ int32_t expectLen = u_unescape(itemsPtr->targetText, expect, kUBufMax);
+ if (textLen != expectLen || u_strncmp(text, expect, textLen) != 0) {
+ char btext[kBBufMax], bexpect[kBBufMax];
+ u_austrncpy(btext, text, textLen);
+ u_austrncpy(bexpect, expect, expectLen);
+ log_err("FAIL: utrans_transUChars(%s),\n expect %s\n get %s\n", itemsPtr->transID, bexpect, btext);
+ }
+ }
+ utrans_close(utrans);
+ }
+}
+
static void _expectRules(const char* crules,
const char* cfrom,
const char* cto) {
&parseErr, &status);
if (U_FAILURE(status)) {
utrans_close(trans);
- log_err("FAIL: utrans_openRules(%s) failed, error=%s\n",
+ log_data_err("FAIL: utrans_openRules(%s) failed, error=%s (Are you missing data?)\n",
crules, u_errorName(status));
return;
}