+
+#define ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
+ errcheckln(status, "error at file %s, line %d, status = %s", __FILE__, __LINE__, \
+ u_errorName(status)); \
+ return;}}
+
+
+static void writeStringInU8(FILE *out, const UnicodeString &s) {
+ int i;
+ for (i=0; i<s.length(); i=s.moveIndex32(i, 1)) {
+ UChar32 c = s.char32At(i);
+ uint8_t bufForOneChar[10];
+ UBool isError = FALSE;
+ int32_t destIdx = 0;
+ U8_APPEND(bufForOneChar, destIdx, (int32_t)sizeof(bufForOneChar), c, isError);
+ fwrite(bufForOneChar, 1, destIdx, out);
+ }
+}
+
+
+
+
+void TransliteratorRoundTripTest::TestHan() {
+ UErrorCode status = U_ZERO_ERROR;
+ LocalULocaleDataPointer uld(ulocdata_open("zh",&status));
+ LocalUSetPointer USetExemplars(ulocdata_getExemplarSet(uld.getAlias(), uset_openEmpty(), 0, ULOCDATA_ES_STANDARD, &status));
+ ASSERT_SUCCESS(status);
+
+ UnicodeString source;
+ UChar32 c;
+ int i;
+ for (i=0; ;i++) {
+ // Add all of the Chinese exemplar chars to the string "source".
+ c = uset_charAt(USetExemplars.getAlias(), i);
+ if (c == (UChar32)-1) {
+ break;
+ }
+ source.append(c);
+ }
+
+ // transform with Han translit
+ Transliterator *hanTL = Transliterator::createInstance("Han-Latin", UTRANS_FORWARD, status);
+ ASSERT_SUCCESS(status);
+ UnicodeString target=source;
+ hanTL->transliterate(target);
+ // now verify that there are no Han characters left
+ UnicodeSet allHan("[:han:]", status);
+ ASSERT_SUCCESS(status);
+ if (allHan.containsSome(target)) {
+ errln("file %s, line %d, No Han must be left after Han-Latin transliteration",
+ __FILE__, __LINE__);
+ }
+
+ // check the pinyin translit
+ Transliterator *pn = Transliterator::createInstance("Latin-NumericPinyin", UTRANS_FORWARD, status);
+ ASSERT_SUCCESS(status);
+ UnicodeString target2 = target;
+ pn->transliterate(target2);
+
+ // verify that there are no marks
+ Transliterator *nfd = Transliterator::createInstance("nfd", UTRANS_FORWARD, status);
+ ASSERT_SUCCESS(status);
+
+ UnicodeString nfded = target2;
+ nfd->transliterate(nfded);
+ UnicodeSet allMarks(UNICODE_STRING_SIMPLE("[\\u0304\\u0301\\u030C\\u0300\\u0306]"), status); // look only for Pinyin tone marks, not all marks (there are some others in there)
+ ASSERT_SUCCESS(status);
+ assertFalse("NumericPinyin must contain no marks", allMarks.containsSome(nfded));
+
+ // verify roundtrip
+ Transliterator *np = pn->createInverse(status);
+ ASSERT_SUCCESS(status);
+ UnicodeString target3 = target2;
+ np->transliterate(target3);
+ UBool roundtripOK = (target3.compare(target) == 0);
+ assertTrue("NumericPinyin must roundtrip", roundtripOK);
+ if (!roundtripOK) {
+ const char *filename = "numeric-pinyin.log.txt";
+ FILE *out = fopen(filename, "w");
+ errln("Creating log file %s\n", filename);
+ fprintf(out, "Pinyin: ");
+ writeStringInU8(out, target);
+ fprintf(out, "\nPinyin-Numeric-Pinyin: ");
+ writeStringInU8(out, target2);
+ fprintf(out, "\nNumeric-Pinyin-Pinyin: ");
+ writeStringInU8(out, target3);
+ fprintf(out, "\n");
+ fclose(out);