+static void
+TestGetDecomposition() {
+ UChar decomp[32];
+ int32_t length;
+
+ UErrorCode errorCode=U_ZERO_ERROR;
+ const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIGUOUS, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode));
+ return;
+ }
+
+ length=unorm2_getDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
+ if(U_FAILURE(errorCode) || length>=0) {
+ log_err("unorm2_getDecomposition(fcc, space) failed\n");
+ }
+ errorCode=U_ZERO_ERROR;
+ length=unorm2_getDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
+ if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
+ log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n");
+ }
+ errorCode=U_ZERO_ERROR;
+ length=unorm2_getDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
+ if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0x11a8 || decomp[3]!=0) {
+ log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n");
+ }
+ errorCode=U_ZERO_ERROR;
+ length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode);
+ if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
+ log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n");
+ }
+ errorCode=U_ZERO_ERROR;
+ length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode);
+ if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
+ log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n");
+ }
+ errorCode=U_ZERO_ERROR;
+ length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode);
+ if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
+ log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n");
+ }
+}
+
+static void
+TestGetRawDecomposition() {
+ UChar decomp[32];
+ int32_t length;
+
+ UErrorCode errorCode=U_ZERO_ERROR;
+ const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
+ return;
+ }
+ /*
+ * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
+ * without recursive decomposition.
+ */
+
+ length=unorm2_getRawDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
+ if(U_FAILURE(errorCode) || length>=0) {
+ log_err("unorm2_getDecomposition(nfkc, space) failed\n");
+ }
+ errorCode=U_ZERO_ERROR;
+ length=unorm2_getRawDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
+ if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
+ log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n");
+ }
+ /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
+ errorCode=U_ZERO_ERROR;
+ length=unorm2_getRawDecomposition(n2, 0x1e08, decomp, UPRV_LENGTHOF(decomp), &errorCode);
+ if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xc7 || decomp[1]!=0x301 || decomp[2]!=0) {
+ log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n");
+ }
+ /* U+212B ANGSTROM SIGN */
+ errorCode=U_ZERO_ERROR;
+ length=unorm2_getRawDecomposition(n2, 0x212b, decomp, UPRV_LENGTHOF(decomp), &errorCode);
+ if(U_FAILURE(errorCode) || length!=1 || decomp[0]!=0xc5 || decomp[1]!=0) {
+ log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n");
+ }
+ errorCode=U_ZERO_ERROR;
+ length=unorm2_getRawDecomposition(n2, 0xac00, decomp, UPRV_LENGTHOF(decomp), &errorCode);
+ if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0) {
+ log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n");
+ }
+ /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
+ errorCode=U_ZERO_ERROR;
+ length=unorm2_getRawDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
+ if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xac00 || decomp[1]!=0x11a8 || decomp[2]!=0) {
+ log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n");
+ }
+ errorCode=U_ZERO_ERROR;
+ length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 0, &errorCode);
+ if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) {
+ log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n");
+ }
+ errorCode=U_ZERO_ERROR;
+ length=unorm2_getRawDecomposition(n2, 0xac01, decomp, -1, &errorCode);
+ if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
+ log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n");
+ }
+ errorCode=U_ZERO_ERROR;
+ length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 4, &errorCode);
+ if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
+ log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n");
+ }
+}
+
+static void
+TestAppendRestoreMiddle() {
+ UChar a[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 }; /* last chars are 'A' and 'cedilla' NFC */
+ static const UChar b[]={ 0x30A, 0x64, 0x65, 0x66, 0 }; /* first char is 'ring above' NFC */
+ /* NFC: C5 is 'A with ring above' */
+ static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
+ int32_t length;
+ UErrorCode errorCode=U_ZERO_ERROR;
+ const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
+ return;
+ }
+ /*
+ * Use length=-1 to fool the estimate of the ReorderingBuffer capacity.
+ * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A>
+ * still fits into a[] but the full result still overflows this capacity.
+ * (Let it modify the destination buffer before reallocating internally.)
+ */
+ length=unorm2_append(n2, a, -1, 6, b, -1, &errorCode);
+ if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=UPRV_LENGTHOF(expected)) {
+ log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length);
+ return;
+ }
+ /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */
+ if(a[0]!=0x61 || a[1]!=0x62 || a[2]!=0x63 || a[3]!=0x41 || a[4]!=0x327 || a[5]!=0) {
+ log_err("unorm2_append(overflow) modified the first string\n");
+ return;
+ }
+ errorCode=U_ZERO_ERROR;
+ length=unorm2_append(n2, a, -1, UPRV_LENGTHOF(a), b, -1, &errorCode);
+ if(U_FAILURE(errorCode) || length!=UPRV_LENGTHOF(expected) || 0!=u_memcmp(a, expected, length)) {
+ log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode), (int)length);
+ return;
+ }
+}
+
+static void
+TestGetEasyToUseInstance() {
+ static const UChar in[]={
+ 0xA0, /* -> <noBreak> 0020 */
+ 0xC7, 0x301 /* = 1E08 = 0043 0327 0301 */
+ };
+ UChar out[32];
+ int32_t length;
+
+ UErrorCode errorCode=U_ZERO_ERROR;
+ const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
+ return;
+ }
+ length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
+ if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) {
+ log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n",
+ (int)length, u_errorName(errorCode));
+ }
+
+ errorCode=U_ZERO_ERROR;
+ n2=unorm2_getNFDInstance(&errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode));
+ return;
+ }
+ length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
+ if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
+ log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n",
+ (int)length, u_errorName(errorCode));
+ }
+
+ errorCode=U_ZERO_ERROR;
+ n2=unorm2_getNFKCInstance(&errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
+ return;
+ }
+ length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
+ if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) {
+ log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n",
+ (int)length, u_errorName(errorCode));
+ }
+
+ errorCode=U_ZERO_ERROR;
+ n2=unorm2_getNFKDInstance(&errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode));
+ return;
+ }
+ length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
+ if(U_FAILURE(errorCode) || length!=4 || out[0]!=0x20 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
+ log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n",
+ (int)length, u_errorName(errorCode));
+ }
+
+ errorCode=U_ZERO_ERROR;
+ n2=unorm2_getNFKCCasefoldInstance(&errorCode);
+ if(U_FAILURE(errorCode)) {
+ log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode));
+ return;
+ }
+ length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
+ if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) {
+ log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n",
+ (int)length, u_errorName(errorCode));
+ }
+}
+