ICU-551.41.tar.gz

[apple/icu.git] / icuSources / test / intltest / itrbnf.cpp
diff --git a/icuSources/test/intltest/itrbnf.cpp b/icuSources/test/intltest/itrbnf.cpp

index e757a36a73b63b9b2e3e46e530861a1b59922cca..ee416d452115bf1d7601fecfd7b315a83183db55 100644 (file)
--- a/icuSources/test/intltest/itrbnf.cpp
+++ b/icuSources/test/intltest/itrbnf.cpp
@@ -1,6 +1,6 @@
  /*
   *******************************************************************************
- * Copyright (C) 1996-2013, International Business Machines Corporation and    *
+ * Copyright (C) 1996-2015, International Business Machines Corporation and    *
   * others. All Rights Reserved.                                                *
   *******************************************************************************
   */
@@ -21,8 +21,6 @@
  #include "unicode/udata.h"
  #include "testutil.h"
  
-//#include "llong.h"
-
  #include <string.h>
  
  // import com.ibm.text.RuleBasedNumberFormat;
@@ -67,6 +65,8 @@ void IntlTestRBNF::runIndexedTest(int32_t index, UBool exec, const char* &name,
          TESTCASE(17, TestPortugueseSpellout);
          TESTCASE(18, TestMultiplierSubstitution);
          TESTCASE(19, TestSetDecimalFormatSymbols);
+        TESTCASE(20, TestPluralRules);
+        TESTCASE(21, TestMultiplePluralRules);
  #else
          TESTCASE(0, TestRBNFDisabled);
  #endif
@@ -384,6 +384,51 @@ IntlTestRBNF::TestAPI() {
    delete formatter;
  }
  
+/**
+ * Perform a simple spot check on the parsing going into an infinite loop for alternate rules.
+ */
+void IntlTestRBNF::TestMultiplePluralRules() {
+    // This is trying to model the feminine form, but don't worry about the details too much.
+    // We're trying to test the plural rules where there are different prefixes.
+    UnicodeString rules("%spellout-cardinal-feminine-genitive:"
+                "0: zero;"
+                "1: ono;"
+                "1000: << $(cardinal,one{thousand}few{thousanF}other{thousanO})$[ >>];"
+                "%spellout-cardinal-feminine:"
+                "0: zero;"
+                "1: one;"
+                "1000: << $(cardinal,one{thousand}few{thousanF}other{thousanO})$[ >>];");
+    UErrorCode status = U_ZERO_ERROR;
+    UParseError pError;
+    RuleBasedNumberFormat formatter(rules, Locale("ru"), pError, status);
+    Formattable result;
+    UnicodeString resultStr;
+    FieldPosition pos;
+
+    if (U_FAILURE(status)) {
+        dataerrln("Unable to create formatter - %s", u_errorName(status));
+        return;
+    }
+
+    formatter.parse(formatter.format(1000.0, resultStr, pos, status), result, status);
+    if (1000 != result.getLong() || resultStr != UNICODE_STRING_SIMPLE("one thousand")) {
+        errln("RuleBasedNumberFormat did not return the correct value. Got: %d", result.getLong());
+        errln(resultStr);
+    }
+    resultStr.remove();
+    formatter.parse(formatter.format(1000.0, UnicodeString("%spellout-cardinal-feminine-genitive"), resultStr, pos, status), result, status);
+    if (1000 != result.getLong() || resultStr != UNICODE_STRING_SIMPLE("ono thousand")) {
+        errln("RuleBasedNumberFormat(cardinal-feminine-genitive) did not return the correct value. Got: %d", result.getLong());
+        errln(resultStr);
+    }
+    resultStr.remove();
+    formatter.parse(formatter.format(1000.0, UnicodeString("%spellout-cardinal-feminine"), resultStr, pos, status), result, status);
+    if (1000 != result.getLong() || resultStr != UNICODE_STRING_SIMPLE("one thousand")) {
+        errln("RuleBasedNumberFormat(spellout-cardinal-feminine) did not return the correct value. Got: %d", result.getLong());
+        errln(resultStr);
+    }
+}
+
  void IntlTestRBNF::TestFractionalRuleSet()
  {
      UnicodeString fracRules(
@@ -1843,34 +1888,25 @@ IntlTestRBNF::TestAllLocales()
  {
      const char* names[] = {
          " (spellout) ",
-        " (ordinal)  ",
-        " (duration) "
+        " (ordinal)  "
+        // " (duration) " // This is English only, and it's not really supported in CLDR anymore.
      };
      double numbers[] = {45.678, 1, 2, 10, 11, 100, 110, 200, 1000, 1111, -1111};
  
-    // RBNF parse is extremely slow when lenient option is enabled.
-    // For non-exhaustive mode, we only test a few locales.
-    const char* parseLocales[] = {"en_US", "nl_NL", "be", NULL};
-
-
      int32_t count = 0;
      const Locale* locales = Locale::getAvailableLocales(count);
      for (int i = 0; i < count; ++i) {
          const Locale* loc = &locales[i];
-        UBool testParse = TRUE;
-        if (quick) {
-            testParse = FALSE;
-            for (int k = 0; parseLocales[k] != NULL; k++) {
-                if (strcmp(loc->getLanguage(), parseLocales[k]) == 0) {
-                    testParse = TRUE;
-                    break;
-                }
-            }
-        }
  
-        for (int j = 0; j < 3; ++j) {
+        for (int j = 0; j < 2; ++j) {
              UErrorCode status = U_ZERO_ERROR;
              RuleBasedNumberFormat* f = new RuleBasedNumberFormat((URBNFRuleSetTag)j, *loc, status);
+
+            if (status == U_USING_DEFAULT_WARNING || status == U_USING_FALLBACK_WARNING) {
+                // Skip it.
+                delete f;
+                break;
+            }
              if (U_FAILURE(status)) {
                  errln(UnicodeString(loc->getName()) + names[j]
                      + "ERROR could not instantiate -> " + u_errorName(status));
@@ -1882,40 +1918,58 @@ IntlTestRBNF::TestAllLocales()
                  UnicodeString str;
                  f->format(n, str);
  
-                logln(UnicodeString(loc->getName()) + names[j]
-                    + "success: " + n + " -> " + str);
+                if (verbose) {
+                    logln(UnicodeString(loc->getName()) + names[j]
+                        + "success: " + n + " -> " + str);
+                }
  
-                if (testParse) {
-                    // We do not validate the result in this test case,
-                    // because there are cases which do not round trip by design.
-                    Formattable num;
+                // We do not validate the result in this test case,
+                // because there are cases which do not round trip by design.
+                Formattable num;
  
-                    // regular parse
-                    status = U_ZERO_ERROR;
-                    f->setLenient(FALSE);
-                    f->parse(str, num, status);
-                    if (U_FAILURE(status)) {
-                        //TODO: We need to fix parse problems - see #6895 / #6896
-                        if (status == U_INVALID_FORMAT_ERROR) {
-                            logln(UnicodeString(loc->getName()) + names[j]
-                                + "WARNING could not parse '" + str + "' -> " + u_errorName(status));
-                        } else {
-                             errln(UnicodeString(loc->getName()) + names[j]
-                                + "ERROR could not parse '" + str + "' -> " + u_errorName(status));
-                       }
+                // regular parse
+                status = U_ZERO_ERROR;
+                f->setLenient(FALSE);
+                f->parse(str, num, status);
+                if (U_FAILURE(status)) {
+                    errln(UnicodeString(loc->getName()) + names[j]
+                        + "ERROR could not parse '" + str + "' -> " + u_errorName(status));
+                }
+                // We only check the spellout. The behavior is undefined for numbers < 1 and fractional numbers.
+                if (j == 0) {
+                    if (num.getType() == Formattable::kLong && num.getLong() != n) {
+                        errln(UnicodeString(loc->getName()) + names[j]
+                            + UnicodeString("ERROR could not roundtrip ") + n
+                            + UnicodeString(" -> ") + str + UnicodeString(" -> ") + num.getLong());
                      }
+                    else if (num.getType() == Formattable::kDouble && (int64_t)(num.getDouble() * 1000) != (int64_t)(n*1000)) {
+                        // The epsilon difference is too high.
+                        errln(UnicodeString(loc->getName()) + names[j]
+                            + UnicodeString("ERROR could not roundtrip ") + n
+                            + UnicodeString(" -> ") + str + UnicodeString(" -> ") + num.getDouble());
+                    }
+                }
+                if (!quick && !logKnownIssue("9503") ) {
                      // lenient parse
                      status = U_ZERO_ERROR;
                      f->setLenient(TRUE);
                      f->parse(str, num, status);
                      if (U_FAILURE(status)) {
-                        //TODO: We need to fix parse problems - see #6895 / #6896
-                        if (status == U_INVALID_FORMAT_ERROR) {
-                            logln(UnicodeString(loc->getName()) + names[j]
-                                + "WARNING could not parse(lenient) '" + str + "' -> " + u_errorName(status));
-                        } else {
+                        errln(UnicodeString(loc->getName()) + names[j]
+                            + "ERROR could not parse(lenient) '" + str + "' -> " + u_errorName(status));
+                    }
+                    // We only check the spellout. The behavior is undefined for numbers < 1 and fractional numbers.
+                    if (j == 0) {
+                        if (num.getType() == Formattable::kLong && num.getLong() != n) {
+                            errln(UnicodeString(loc->getName()) + names[j]
+                                + UnicodeString("ERROR could not roundtrip ") + n
+                                + UnicodeString(" -> ") + str + UnicodeString(" -> ") + num.getLong());
+                        }
+                        else if (num.getType() == Formattable::kDouble && (int64_t)(num.getDouble() * 1000) != (int64_t)(n*1000)) {
+                            // The epsilon difference is too high.
                              errln(UnicodeString(loc->getName()) + names[j]
-                                + "ERROR could not parse(lenient) '" + str + "' -> " + u_errorName(status));
+                                + UnicodeString("ERROR could not roundtrip ") + n
+                                + UnicodeString(" -> ") + str + UnicodeString(" -> ") + num.getDouble());
                          }
                      }
                  }
@@ -1928,27 +1982,27 @@ IntlTestRBNF::TestAllLocales()
  
  void 
  IntlTestRBNF::TestMultiplierSubstitution(void) {
-  UnicodeString rules("=#,##0=;1,000,000: <##0.###< million;");
-  UErrorCode status = U_ZERO_ERROR;
-  UParseError parse_error;
-  RuleBasedNumberFormat *rbnf = 
-    new RuleBasedNumberFormat(rules, Locale::getUS(), parse_error, status);
-  if (U_SUCCESS(status)) {
-    UnicodeString res;
-    FieldPosition pos;
-    double n = 1234000.0;
-    rbnf->format(n, res, pos);
-    delete rbnf;
-
-    UnicodeString expected = UNICODE_STRING_SIMPLE("1.234 million");
-    if (expected != res) {
-      UnicodeString msg = "Expected: ";
-      msg.append(expected);
-      msg.append(" but got ");
-      msg.append(res);
-      errln(msg);
+    UnicodeString rules("=#,##0=;1,000,000: <##0.###< million;");
+    UErrorCode status = U_ZERO_ERROR;
+    UParseError parse_error;
+    RuleBasedNumberFormat *rbnf = 
+        new RuleBasedNumberFormat(rules, Locale::getUS(), parse_error, status);
+    if (U_SUCCESS(status)) {
+        UnicodeString res;
+        FieldPosition pos;
+        double n = 1234000.0;
+        rbnf->format(n, res, pos);
+        delete rbnf;
+
+        UnicodeString expected(UNICODE_STRING_SIMPLE("1.234 million"));
+        if (expected != res) {
+            UnicodeString msg = "Expected: ";
+            msg.append(expected);
+            msg.append(" but got ");
+            msg.append(res);
+            errln(msg);
+        }
      }
-  }
  }
  
  void
@@ -1993,6 +2047,106 @@ IntlTestRBNF::TestSetDecimalFormatSymbols() {
      }
  }
  
+void IntlTestRBNF::TestPluralRules() {
+    UErrorCode status = U_ZERO_ERROR;
+    UnicodeString enRules("%digits-ordinal:-x: ->>;0: =#,##0=$(ordinal,one{st}two{nd}few{rd}other{th})$;");
+    UParseError parseError;
+    RuleBasedNumberFormat enFormatter(enRules, Locale::getEnglish(), parseError, status);
+    if (U_FAILURE(status)) {
+        dataerrln("Unable to create RuleBasedNumberFormat - " + UnicodeString(u_errorName(status)));
+        return;
+    }
+    const char* const enTestData[][2] = {
+            { "1", "1st" },
+            { "2", "2nd" },
+            { "3", "3rd" },
+            { "4", "4th" },
+            { "11", "11th" },
+            { "12", "12th" },
+            { "13", "13th" },
+            { "14", "14th" },
+            { "21", "21st" },
+            { "22", "22nd" },
+            { "23", "23rd" },
+            { "24", "24th" },
+            { NULL, NULL }
+    };
+
+    doTest(&enFormatter, enTestData, TRUE);
+
+    // This is trying to model the feminine form, but don't worry about the details too much.
+    // We're trying to test the plural rules.
+    UnicodeString ruRules("%spellout-numbering:"
+            "-x: minus >>;"
+            "x.x: << point >>;"
+            "0: zero;"
+            "1: one;"
+            "2: two;"
+            "3: three;"
+            "4: four;"
+            "5: five;"
+            "6: six;"
+            "7: seven;"
+            "8: eight;"
+            "9: nine;"
+            "10: ten;"
+            "11: eleven;"
+            "12: twelve;"
+            "13: thirteen;"
+            "14: fourteen;"
+            "15: fifteen;"
+            "16: sixteen;"
+            "17: seventeen;"
+            "18: eighteen;"
+            "19: nineteen;"
+            "20: twenty[->>];"
+            "30: thirty[->>];"
+            "40: forty[->>];"
+            "50: fifty[->>];"
+            "60: sixty[->>];"
+            "70: seventy[->>];"
+            "80: eighty[->>];"
+            "90: ninety[->>];"
+            "100: hundred[ >>];"
+            "200: << hundred[ >>];"
+            "300: << hundreds[ >>];"
+            "500: << hundredss[ >>];"
+            "1000: << $(cardinal,one{thousand}few{thousands}other{thousandss})$[ >>];"
+            "1000000: << $(cardinal,one{million}few{millions}other{millionss})$[ >>];");
+    RuleBasedNumberFormat ruFormatter(ruRules, Locale("ru"), parseError, status);
+    const char* const ruTestData[][2] = {
+            { "1", "one" },
+            { "100", "hundred" },
+            { "125", "hundred twenty-five" },
+            { "399", "three hundreds ninety-nine" },
+            { "1,000", "one thousand" },
+            { "1,001", "one thousand one" },
+            { "2,000", "two thousands" },
+            { "2,001", "two thousands one" },
+            { "2,002", "two thousands two" },
+            { "3,333", "three thousands three hundreds thirty-three" },
+            { "5,000", "five thousandss" },
+            { "11,000", "eleven thousandss" },
+            { "21,000", "twenty-one thousand" },
+            { "22,000", "twenty-two thousands" },
+            { "25,001", "twenty-five thousandss one" },
+            { NULL, NULL }
+    };
+
+    if (U_FAILURE(status)) {
+        errln("Unable to create RuleBasedNumberFormat - " + UnicodeString(u_errorName(status)));
+        return;
+    }
+    doTest(&ruFormatter, ruTestData, TRUE);
+
+    // Make sure there are no divide by 0 errors.
+    UnicodeString result;
+    RuleBasedNumberFormat(ruRules, Locale("ru"), parseError, status).format(21000, result);
+    if (result.compare(UNICODE_STRING_SIMPLE("twenty-one thousand")) != 0) {
+        errln("Got " + result + " for 21000");
+    }
+
+}
  
  void 
  IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* const testData[][2], UBool testParsing)