ICU-531.30.tar.gz

[apple/icu.git] / icuSources / i18n / msgfmt.cpp
diff --git a/icuSources/i18n/msgfmt.cpp b/icuSources/i18n/msgfmt.cpp

index 3c2c7c65a1b91213e578b55c9324d2f75284af09..f9b353fcbe114a6393b549bb40cd168d27d382cc 100644 (file)
--- a/icuSources/i18n/msgfmt.cpp
+++ b/icuSources/i18n/msgfmt.cpp
@@ -1,6 +1,6 @@
  /********************************************************************
   * COPYRIGHT:
- * Copyright (c) 1997-2012, International Business Machines Corporation and
+ * Copyright (c) 1997-2013, International Business Machines Corporation and
   * others. All Rights Reserved.
   ********************************************************************
   *
@@ -39,6 +39,7 @@
  #include "patternprops.h"
  #include "messageimpl.h"
  #include "msgfmt_impl.h"
+#include "plurrule_impl.h"
  #include "uassert.h"
  #include "uelement.h"
  #include "uhash.h"
@@ -203,6 +204,16 @@ public:
              append(s);
          }
      }
+    void formatAndAppend(const Format* formatter, const Formattable& arg,
+                         const UnicodeString &argString, UErrorCode& ec) {
+        if (!argString.isEmpty()) {
+            if (U_SUCCESS(ec)) {
+                append(argString);
+            }
+        } else {
+            formatAndAppend(formatter, arg, ec);
+        }
+    }
      int32_t length() {
          return len;
      }
@@ -229,8 +240,8 @@ MessageFormat::MessageFormat(const UnicodeString& pattern,
    defaultDateFormat(NULL),
    cachedFormatters(NULL),
    customFormatArgStarts(NULL),
-  pluralProvider(&fLocale, UPLURAL_TYPE_CARDINAL),
-  ordinalProvider(&fLocale, UPLURAL_TYPE_ORDINAL)
+  pluralProvider(*this, UPLURAL_TYPE_CARDINAL),
+  ordinalProvider(*this, UPLURAL_TYPE_ORDINAL)
  {
      setLocaleIDs(fLocale.getName(), fLocale.getName());
      applyPattern(pattern, success);
@@ -251,8 +262,8 @@ MessageFormat::MessageFormat(const UnicodeString& pattern,
    defaultDateFormat(NULL),
    cachedFormatters(NULL),
    customFormatArgStarts(NULL),
-  pluralProvider(&fLocale, UPLURAL_TYPE_CARDINAL),
-  ordinalProvider(&fLocale, UPLURAL_TYPE_ORDINAL)
+  pluralProvider(*this, UPLURAL_TYPE_CARDINAL),
+  ordinalProvider(*this, UPLURAL_TYPE_ORDINAL)
  {
      setLocaleIDs(fLocale.getName(), fLocale.getName());
      applyPattern(pattern, success);
@@ -274,8 +285,8 @@ MessageFormat::MessageFormat(const UnicodeString& pattern,
    defaultDateFormat(NULL),
    cachedFormatters(NULL),
    customFormatArgStarts(NULL),
-  pluralProvider(&fLocale, UPLURAL_TYPE_CARDINAL),
-  ordinalProvider(&fLocale, UPLURAL_TYPE_ORDINAL)
+  pluralProvider(*this, UPLURAL_TYPE_CARDINAL),
+  ordinalProvider(*this, UPLURAL_TYPE_ORDINAL)
  {
      setLocaleIDs(fLocale.getName(), fLocale.getName());
      applyPattern(pattern, parseError, success);
@@ -296,8 +307,8 @@ MessageFormat::MessageFormat(const MessageFormat& that)
    defaultDateFormat(NULL),
    cachedFormatters(NULL),
    customFormatArgStarts(NULL),
-  pluralProvider(&fLocale, UPLURAL_TYPE_CARDINAL),
-  ordinalProvider(&fLocale, UPLURAL_TYPE_ORDINAL)
+  pluralProvider(*this, UPLURAL_TYPE_CARDINAL),
+  ordinalProvider(*this, UPLURAL_TYPE_ORDINAL)
  {
      // This will take care of creating the hash tables (since they are NULL).
      UErrorCode ec = U_ZERO_ERROR;
@@ -440,8 +451,8 @@ MessageFormat::setLocale(const Locale& theLocale)
          defaultDateFormat = NULL;
          fLocale = theLocale;
          setLocaleIDs(fLocale.getName(), fLocale.getName());
-        pluralProvider.reset(&fLocale);
-        ordinalProvider.reset(&fLocale);
+        pluralProvider.reset();
+        ordinalProvider.reset();
      }
  }
  
@@ -537,6 +548,7 @@ void MessageFormat::setArgStartFormat(int32_t argStart,
                                        UErrorCode& status) {
      if (U_FAILURE(status)) {
          delete formatter;
+        return;
      }
      if (cachedFormatters == NULL) {
          cachedFormatters=uhash_open(uhash_hashLong, uhash_compareLong,
@@ -830,12 +842,7 @@ MessageFormat::getFormats(int32_t& cnt) const
  
  UnicodeString MessageFormat::getArgName(int32_t partIndex) {
      const MessagePattern::Part& part = msgPattern.getPart(partIndex);
-    if (part.getType() == UMSGPAT_PART_TYPE_ARG_NAME) {
-        return msgPattern.getSubstring(part);
-    } else {
-        UnicodeString temp;
-        return itos(part.getValue(), temp);
-    }
+    return msgPattern.getSubstring(part);
  }
  
  StringEnumeration*
@@ -944,13 +951,55 @@ MessageFormat::format(const Formattable* arguments,
  
      UnicodeStringAppendable usapp(appendTo);
      AppendableWrapper app(usapp);
-    format(0, 0.0, arguments, argumentNames, cnt, app, pos, status);
+    format(0, NULL, arguments, argumentNames, cnt, app, pos, status);
      return appendTo;
  }
  
+namespace {
+
+/**
+ * Mutable input/output values for the PluralSelectorProvider.
+ * Separate so that it is possible to make MessageFormat Freezable.
+ */
+class PluralSelectorContext {
+public:
+    PluralSelectorContext(int32_t start, const UnicodeString &name,
+                          const Formattable &num, double off, UErrorCode &errorCode)
+            : startIndex(start), argName(name), offset(off),
+              numberArgIndex(-1), formatter(NULL), forReplaceNumber(FALSE) {
+        // number needs to be set even when select() is not called.
+        // Keep it as a Number/Formattable:
+        // For format() methods, and to preserve information (e.g., BigDecimal).
+        if(off == 0) {
+            number = num;
+        } else {
+            number = num.getDouble(errorCode) - off;
+        }
+    }
+
+    // Input values for plural selection with decimals.
+    int32_t startIndex;
+    const UnicodeString &argName;
+    /** argument number - plural offset */
+    Formattable number;
+    double offset;
+    // Output values for plural selection with decimals.
+    /** -1 if REPLACE_NUMBER, 0 arg not found, >0 ARG_START index */
+    int32_t numberArgIndex;
+    const Format *formatter;
+    /** formatted argument number - plural offset */
+    UnicodeString numberString;
+    /** TRUE if number-offset was formatted with the stock number formatter */
+    UBool forReplaceNumber;
+};
+
+}  // namespace
+
  // if argumentNames is NULL, this means arguments is a numeric array.
  // arguments can not be NULL.
-void MessageFormat::format(int32_t msgStart, double pluralNumber,
+// We use const void *plNumber rather than const PluralSelectorContext *pluralNumber
+// so that we need not declare the PluralSelectorContext in the public header file.
+void MessageFormat::format(int32_t msgStart, const void *plNumber,
                             const Formattable* arguments,
                             const UnicodeString *argumentNames,
                             int32_t cnt,
@@ -973,8 +1022,16 @@ void MessageFormat::format(int32_t msgStart, double pluralNumber,
          }
          prevIndex = part->getLimit();
          if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
-            const NumberFormat* nf = getDefaultNumberFormat(success);
-            appendTo.formatAndAppend(nf, Formattable(pluralNumber), success);
+            const PluralSelectorContext &pluralNumber =
+                *static_cast<const PluralSelectorContext *>(plNumber);
+            if(pluralNumber.forReplaceNumber) {
+                // number-offset was already formatted.
+                appendTo.formatAndAppend(pluralNumber.formatter,
+                        pluralNumber.number, pluralNumber.numberString, success);
+            } else {
+                const NumberFormat* nf = getDefaultNumberFormat(success);
+                appendTo.formatAndAppend(nf, pluralNumber.number, success);
+            }
              continue;
          }
          if (type != UMSGPAT_PART_TYPE_ARG_START) {
@@ -984,38 +1041,43 @@ void MessageFormat::format(int32_t msgStart, double pluralNumber,
          UMessagePatternArgType argType = part->getArgType();
          part = &msgPattern.getPart(++i);
          const Formattable* arg;
-        UnicodeString noArg;
+        UBool noArg = FALSE;
+        UnicodeString argName = msgPattern.getSubstring(*part);
          if (argumentNames == NULL) {
              int32_t argNumber = part->getValue();  // ARG_NUMBER
              if (0 <= argNumber && argNumber < cnt) {
                  arg = arguments + argNumber;
              } else {
                  arg = NULL;
-                noArg.append(LEFT_CURLY_BRACE);
-                itos(argNumber, noArg);
-                noArg.append(RIGHT_CURLY_BRACE);
+                noArg = TRUE;
              }
          } else {
-            UnicodeString key;
-            if (part->getType() == UMSGPAT_PART_TYPE_ARG_NAME) {
-                key = msgPattern.getSubstring(*part);
-            } else /* UMSGPAT_PART_TYPE_ARG_NUMBER */ {
-                itos(part->getValue(), key);
-            }
-            arg = getArgFromListByName(arguments, argumentNames, cnt, key);
+            arg = getArgFromListByName(arguments, argumentNames, cnt, argName);
              if (arg == NULL) {
-                noArg.append(LEFT_CURLY_BRACE);
-                noArg.append(key);
-                noArg.append(RIGHT_CURLY_BRACE);
+                noArg = TRUE;
              }
          }
          ++i;
          int32_t prevDestLength = appendTo.length();
          const Format* formatter = NULL;
-        if (!noArg.isEmpty()) {
-            appendTo.append(noArg);
+        if (noArg) {
+            appendTo.append(
+                UnicodeString(LEFT_CURLY_BRACE).append(argName).append(RIGHT_CURLY_BRACE));
          } else if (arg == NULL) {
              appendTo.append(NULL_STRING, 4);
+        } else if(plNumber!=NULL &&
+                static_cast<const PluralSelectorContext *>(plNumber)->numberArgIndex==(i-2)) {
+            const PluralSelectorContext &pluralNumber =
+                *static_cast<const PluralSelectorContext *>(plNumber);
+            if(pluralNumber.offset == 0) {
+                // The number was already formatted with this formatter.
+                appendTo.formatAndAppend(pluralNumber.formatter, pluralNumber.number,
+                                         pluralNumber.numberString, success);
+            } else {
+                // Do not use the formatted (number-offset) string for a named argument
+                // that formats the number without subtracting the offset.
+                appendTo.formatAndAppend(pluralNumber.formatter, *arg, success);
+            }
          } else if ((formatter = getCachedFormatter(i -2))) {
              // Handles all ArgType.SIMPLE, and formatters from setFormat() and its siblings.
              if (dynamic_cast<const ChoiceFormat*>(formatter) ||
@@ -1030,7 +1092,7 @@ void MessageFormat::format(int32_t msgStart, double pluralNumber,
                      (subMsgString.indexOf(SINGLE_QUOTE) >= 0 && !MessageImpl::jdkAposMode(msgPattern))
                  ) {
                      MessageFormat subMsgFormat(subMsgString, fLocale, success);
-                    subMsgFormat.format(0, 0, arguments, argumentNames, cnt, appendTo, ignore, success);
+                    subMsgFormat.format(0, NULL, arguments, argumentNames, cnt, appendTo, ignore, success);
                  } else {
                      appendTo.append(subMsgString);
                  }
@@ -1059,26 +1121,26 @@ void MessageFormat::format(int32_t msgStart, double pluralNumber,
              // because only this one converts non-double numeric types to double.
              const double number = arg->getDouble(success);
              int32_t subMsgStart = ChoiceFormat::findSubMessage(msgPattern, i, number);
-            formatComplexSubMessage(subMsgStart, 0, arguments, argumentNames,
+            formatComplexSubMessage(subMsgStart, NULL, arguments, argumentNames,
                                      cnt, appendTo, success);
          } else if (UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType)) {
              if (!arg->isNumeric()) {
                  success = U_ILLEGAL_ARGUMENT_ERROR;
                  return;
              }
-            const PluralFormat::PluralSelector &selector =
+            const PluralSelectorProvider &selector =
                  argType == UMSGPAT_ARG_TYPE_PLURAL ? pluralProvider : ordinalProvider;
              // We must use the Formattable::getDouble() variant with the UErrorCode parameter
              // because only this one converts non-double numeric types to double.
-            double number = arg->getDouble(success);
-            int32_t subMsgStart = PluralFormat::findSubMessage(msgPattern, i, selector, number,
-                                                               success);
              double offset = msgPattern.getPluralOffset(i);
-            formatComplexSubMessage(subMsgStart, number-offset, arguments, argumentNames,
+            PluralSelectorContext context(i, argName, *arg, offset, success);
+            int32_t subMsgStart = PluralFormat::findSubMessage(
+                    msgPattern, i, selector, &context, arg->getDouble(success), success);
+            formatComplexSubMessage(subMsgStart, &context, arguments, argumentNames,
                                      cnt, appendTo, success);
          } else if (argType == UMSGPAT_ARG_TYPE_SELECT) {
              int32_t subMsgStart = SelectFormat::findSubMessage(msgPattern, i, arg->getString(success), success);
-            formatComplexSubMessage(subMsgStart, 0, arguments, argumentNames,
+            formatComplexSubMessage(subMsgStart, NULL, arguments, argumentNames,
                                      cnt, appendTo, success);
          } else {
              // This should never happen.
@@ -1093,7 +1155,7 @@ void MessageFormat::format(int32_t msgStart, double pluralNumber,
  
  
  void MessageFormat::formatComplexSubMessage(int32_t msgStart,
-                                            double pluralNumber,
+                                            const void *plNumber,
                                              const Formattable* arguments,
                                              const UnicodeString *argumentNames,
                                              int32_t cnt,
@@ -1104,7 +1166,7 @@ void MessageFormat::formatComplexSubMessage(int32_t msgStart,
      }
  
      if (!MessageImpl::jdkAposMode(msgPattern)) {
-        format(msgStart, pluralNumber, arguments, argumentNames, cnt, appendTo, NULL, success);
+        format(msgStart, plNumber, arguments, argumentNames, cnt, appendTo, NULL, success);
          return;
      }
  
@@ -1126,8 +1188,15 @@ void MessageFormat::formatComplexSubMessage(int32_t msgStart,
          } else if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER || type == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
              sb.append(msgString, prevIndex, index - prevIndex);
              if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
-                const NumberFormat* nf = getDefaultNumberFormat(success);
-                sb.append(nf->format(pluralNumber, sb, success));
+                const PluralSelectorContext &pluralNumber =
+                    *static_cast<const PluralSelectorContext *>(plNumber);
+                if(pluralNumber.forReplaceNumber) {
+                    // number-offset was already formatted.
+                    sb.append(pluralNumber.numberString);
+                } else {
+                    const NumberFormat* nf = getDefaultNumberFormat(success);
+                    sb.append(nf->format(pluralNumber.number, sb, success));
+                }
              }
              prevIndex = part.getLimit();
          } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
@@ -1143,7 +1212,7 @@ void MessageFormat::formatComplexSubMessage(int32_t msgStart,
          UnicodeString emptyPattern;  // gcc 3.3.3 fails with "UnicodeString()" as the first parameter.
          MessageFormat subMsgFormat(emptyPattern, fLocale, success);
          subMsgFormat.applyPattern(sb, UMSGPAT_APOS_DOUBLE_REQUIRED, NULL, success);
-        subMsgFormat.format(0, 0, arguments, argumentNames, cnt, appendTo, NULL, success);
+        subMsgFormat.format(0, NULL, arguments, argumentNames, cnt, appendTo, NULL, success);
      } else {
          appendTo.append(sb);
      }
@@ -1183,6 +1252,59 @@ FieldPosition* MessageFormat::updateMetaData(AppendableWrapper& /*dest*/, int32_
      */
  }
  
+int32_t
+MessageFormat::findOtherSubMessage(int32_t partIndex) const {
+    int32_t count=msgPattern.countParts();
+    const MessagePattern::Part *part = &msgPattern.getPart(partIndex);
+    if(MessagePattern::Part::hasNumericValue(part->getType())) {
+        ++partIndex;
+    }
+    // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
+    // until ARG_LIMIT or end of plural-only pattern.
+    UnicodeString other(FALSE, OTHER_STRING, 5);
+    do {
+        part=&msgPattern.getPart(partIndex++);
+        UMessagePatternPartType type=part->getType();
+        if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
+            break;
+        }
+        U_ASSERT(type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
+        // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
+        if(msgPattern.partSubstringMatches(*part, other)) {
+            return partIndex;
+        }
+        if(MessagePattern::Part::hasNumericValue(msgPattern.getPartType(partIndex))) {
+            ++partIndex;  // skip the numeric-value part of "=1" etc.
+        }
+        partIndex=msgPattern.getLimitPartIndex(partIndex);
+    } while(++partIndex<count);
+    return 0;
+}
+
+int32_t
+MessageFormat::findFirstPluralNumberArg(int32_t msgStart, const UnicodeString &argName) const {
+    for(int32_t i=msgStart+1;; ++i) {
+        const MessagePattern::Part &part=msgPattern.getPart(i);
+        UMessagePatternPartType type=part.getType();
+        if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) {
+            return 0;
+        }
+        if(type==UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
+            return -1;
+        }
+        if(type==UMSGPAT_PART_TYPE_ARG_START) {
+            UMessagePatternArgType argType=part.getArgType();
+            if(!argName.isEmpty() && (argType==UMSGPAT_ARG_TYPE_NONE || argType==UMSGPAT_ARG_TYPE_SIMPLE)) {
+                // ARG_NUMBER or ARG_NAME
+                if(msgPattern.partSubstringMatches(msgPattern.getPart(i+1), argName)) {
+                    return i;
+                }
+            }
+            i=msgPattern.getLimitPartIndex(i);
+        }
+    }
+}
+
  void MessageFormat::copyObjects(const MessageFormat& that, UErrorCode& ec) {
      // Deep copy pointer fields.
      // We need not copy the formatAliases because they are re-filled
@@ -1796,32 +1918,55 @@ FormatNameEnumeration::~FormatNameEnumeration() {
      delete fFormatNames;
  }
  
-
-MessageFormat::PluralSelectorProvider::PluralSelectorProvider(const Locale* loc, UPluralType t)
-        : locale(loc), rules(NULL), type(t) {
+MessageFormat::PluralSelectorProvider::PluralSelectorProvider(const MessageFormat &mf, UPluralType t)
+        : msgFormat(mf), rules(NULL), type(t) {
  }
  
  MessageFormat::PluralSelectorProvider::~PluralSelectorProvider() {
-    // We own the rules but not the locale.
      delete rules;
  }
  
-UnicodeString MessageFormat::PluralSelectorProvider::select(double number, UErrorCode& ec) const {
+UnicodeString MessageFormat::PluralSelectorProvider::select(void *ctx, double number,
+                                                            UErrorCode& ec) const {
      if (U_FAILURE(ec)) {
          return UnicodeString(FALSE, OTHER_STRING, 5);
      }
      MessageFormat::PluralSelectorProvider* t = const_cast<MessageFormat::PluralSelectorProvider*>(this);
      if(rules == NULL) {
-        t->rules = PluralRules::forLocale(*locale, type, ec);
+        t->rules = PluralRules::forLocale(msgFormat.fLocale, type, ec);
          if (U_FAILURE(ec)) {
              return UnicodeString(FALSE, OTHER_STRING, 5);
          }
      }
-    return rules->select(number);
+    // Select a sub-message according to how the number is formatted,
+    // which is specified in the selected sub-message.
+    // We avoid this circle by looking at how
+    // the number is formatted in the "other" sub-message
+    // which must always be present and usually contains the number.
+    // Message authors should be consistent across sub-messages.
+    PluralSelectorContext &context = *static_cast<PluralSelectorContext *>(ctx);
+    int32_t otherIndex = msgFormat.findOtherSubMessage(context.startIndex);
+    context.numberArgIndex = msgFormat.findFirstPluralNumberArg(otherIndex, context.argName);
+    if(context.numberArgIndex > 0 && msgFormat.cachedFormatters != NULL) {
+        context.formatter =
+            (const Format*)uhash_iget(msgFormat.cachedFormatters, context.numberArgIndex);
+    }
+    if(context.formatter == NULL) {
+        context.formatter = msgFormat.getDefaultNumberFormat(ec);
+        context.forReplaceNumber = TRUE;
+    }
+    U_ASSERT(context.number.getDouble(ec) == number);  // argument number minus the offset
+    context.formatter->format(context.number, context.numberString, ec);
+    const DecimalFormat *decFmt = dynamic_cast<const DecimalFormat *>(context.formatter);
+    if(decFmt != NULL) {
+        FixedDecimal dec = decFmt->getFixedDecimal(context.number, ec);
+        return rules->select(dec);
+    } else {
+        return rules->select(number);
+    }
  }
  
-void MessageFormat::PluralSelectorProvider::reset(const Locale* loc) {
-    locale = loc;
+void MessageFormat::PluralSelectorProvider::reset() {
      delete rules;
      rules = NULL;
  }