ICU-491.11.2.tar.gz

[apple/icu.git] / icuSources / i18n / plurrule.cpp
diff --git a/icuSources/i18n/plurrule.cpp b/icuSources/i18n/plurrule.cpp

index 3f4bf23ae588950b9f98aa9a2037bc4668111e3b..ba9b9bd4488e66ba7b30c1f2adde9d2246b27a9d 100644 (file)
--- a/icuSources/i18n/plurrule.cpp
+++ b/icuSources/i18n/plurrule.cpp
@@ -1,6 +1,6 @@
  /*
  *******************************************************************************
  /*
  *******************************************************************************
-* Copyright (C) 2007-2008, International Business Machines Corporation and
+* Copyright (C) 2007-2011, International Business Machines Corporation and
  * others. All Rights Reserved.
  *******************************************************************************
  *
  * others. All Rights Reserved.
  *******************************************************************************
  *
@@ -13,37 +13,31 @@
  */
  
  
  */
  
  
-#include "unicode/uniset.h"
  #include "unicode/utypes.h"
  #include "unicode/utypes.h"
-#include "unicode/ures.h"
+#include "unicode/localpointer.h"
  #include "unicode/plurrule.h"
  #include "unicode/plurrule.h"
+#include "unicode/ures.h"
  #include "cmemory.h"
  #include "cstring.h"
  #include "hash.h"
  #include "mutex.h"
  #include "cmemory.h"
  #include "cstring.h"
  #include "hash.h"
  #include "mutex.h"
+#include "patternprops.h"
  #include "plurrule_impl.h"
  #include "putilimp.h"
  #include "ucln_in.h"
  #include "ustrfmt.h"
  #include "locutil.h"
  #include "plurrule_impl.h"
  #include "putilimp.h"
  #include "ucln_in.h"
  #include "ustrfmt.h"
  #include "locutil.h"
-
-/*
-// TODO(claireho): remove stdio
-#include "stdio.h"
-*/
+#include "uassert.h"
  
  #if !UCONFIG_NO_FORMATTING
  
  U_NAMESPACE_BEGIN
  
  
  #if !UCONFIG_NO_FORMATTING
  
  U_NAMESPACE_BEGIN
  
+// shared by all instances when lazy-initializing samples
+static UMTX pluralMutex;
  
  #define ARRAY_SIZE(array) (int32_t)(sizeof array  / sizeof array[0])
  
  
  #define ARRAY_SIZE(array) (int32_t)(sizeof array  / sizeof array[0])
  
-static const UChar PLURAL_KEYWORD_ZERO[] = {LOW_Z,LOW_E,LOW_R,LOW_O, 0};
-static const UChar PLURAL_KEYWORD_ONE[]={LOW_O,LOW_N,LOW_E,0};
-static const UChar PLURAL_KEYWORD_TWO[]={LOW_T,LOW_W,LOW_O,0};
-static const UChar PLURAL_KEYWORD_FEW[]={LOW_F,LOW_E,LOW_W,0};
-static const UChar PLURAL_KEYWORD_MANY[]={LOW_M,LOW_A,LOW_N,LOW_Y,0};
  static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0};
  static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0};
  static const UChar PK_IN[]={LOW_I,LOW_N,0};
  static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0};
  static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0};
  static const UChar PK_IN[]={LOW_I,LOW_N,0};
@@ -61,8 +55,15 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)
  PluralRules::PluralRules(UErrorCode& status)
  :   UObject(),
      mRules(NULL),
  PluralRules::PluralRules(UErrorCode& status)
  :   UObject(),
      mRules(NULL),
-    mParser(new RuleParser())
+    mParser(NULL),
+    mSamples(NULL),
+    mSampleInfo(NULL),
+    mSampleInfoCount(0)
  {
  {
+    if (U_FAILURE(status)) {
+        return;
+    }
+    mParser = new RuleParser();
      if (mParser==NULL) {
          status = U_MEMORY_ALLOCATION_ERROR;
      }
      if (mParser==NULL) {
          status = U_MEMORY_ALLOCATION_ERROR;
      }
@@ -71,7 +72,10 @@ PluralRules::PluralRules(UErrorCode& status)
  PluralRules::PluralRules(const PluralRules& other)
  : UObject(other),
      mRules(NULL),
  PluralRules::PluralRules(const PluralRules& other)
  : UObject(other),
      mRules(NULL),
-    mParser(new RuleParser())
+  mParser(NULL),
+  mSamples(NULL),
+  mSampleInfo(NULL),
+  mSampleInfoCount(0)
  {
      *this=other;
  }
  {
      *this=other;
  }
@@ -79,6 +83,8 @@ PluralRules::PluralRules(const PluralRules& other)
  PluralRules::~PluralRules() {
      delete mRules;
      delete mParser;
  PluralRules::~PluralRules() {
      delete mRules;
      delete mParser;
+    uprv_free(mSamples);
+    uprv_free(mSampleInfo);
  }
  
  PluralRules*
  }
  
  PluralRules*
@@ -98,6 +104,13 @@ PluralRules::operator=(const PluralRules& other) {
          }
          delete mParser;
          mParser = new RuleParser();
          }
          delete mParser;
          mParser = new RuleParser();
+
+        uprv_free(mSamples);
+        mSamples = NULL;
+
+        uprv_free(mSampleInfo);
+        mSampleInfo = NULL;
+        mSampleInfoCount = 0;
      }
  
      return *this;
      }
  
      return *this;
@@ -107,6 +120,9 @@ PluralRules* U_EXPORT2
  PluralRules::createRules(const UnicodeString& description, UErrorCode& status) {
      RuleChain   rules;
  
  PluralRules::createRules(const UnicodeString& description, UErrorCode& status) {
      RuleChain   rules;
  
+    if (U_FAILURE(status)) {
+        return NULL;
+    }
      PluralRules *newRules = new PluralRules(status);
      if ( (newRules != NULL)&& U_SUCCESS(status) ) {
          newRules->parseDescription((UnicodeString &)description, rules, status);
      PluralRules *newRules = new PluralRules(status);
      if ( (newRules != NULL)&& U_SUCCESS(status) ) {
          newRules->parseDescription((UnicodeString &)description, rules, status);
@@ -125,15 +141,18 @@ PluralRules::createRules(const UnicodeString& description, UErrorCode& status) {
  
  PluralRules* U_EXPORT2
  PluralRules::createDefaultRules(UErrorCode& status) {
  
  PluralRules* U_EXPORT2
  PluralRules::createDefaultRules(UErrorCode& status) {
-    return createRules(PLURAL_DEFAULT_RULE, status);
+    return createRules(UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1), status);
  }
  
  PluralRules* U_EXPORT2
  PluralRules::forLocale(const Locale& locale, UErrorCode& status) {
      RuleChain   rChain;
  }
  
  PluralRules* U_EXPORT2
  PluralRules::forLocale(const Locale& locale, UErrorCode& status) {
      RuleChain   rChain;
-    status = U_ZERO_ERROR;
+    if (U_FAILURE(status)) {
+        return NULL;
+    }
      PluralRules *newObj = new PluralRules(status);
      PluralRules *newObj = new PluralRules(status);
-    if (newObj==NULL) {
+    if (newObj==NULL || U_FAILURE(status)) {
+        delete newObj;
          return NULL;
      }
      UnicodeString locRule = newObj->getRuleFromResource(locale, status);
          return NULL;
      }
      UnicodeString locRule = newObj->getRuleFromResource(locale, status);
@@ -150,14 +169,14 @@ PluralRules::forLocale(const Locale& locale, UErrorCode& status) {
          newObj->parseDescription(defRule, rChain, status);
          newObj->addRules(rChain);
      }
          newObj->parseDescription(defRule, rChain, status);
          newObj->addRules(rChain);
      }
-    
+
      return newObj;
  }
  
  UnicodeString
  PluralRules::select(int32_t number) const {
      if (mRules == NULL) {
      return newObj;
  }
  
  UnicodeString
  PluralRules::select(int32_t number) const {
      if (mRules == NULL) {
-        return PLURAL_DEFAULT_RULE;
+        return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1);
      }
      else {
          return mRules->select(number);
      }
      else {
          return mRules->select(number);
@@ -167,7 +186,7 @@ PluralRules::select(int32_t number) const {
  UnicodeString
  PluralRules::select(double number) const {
      if (mRules == NULL) {
  UnicodeString
  PluralRules::select(double number) const {
      if (mRules == NULL) {
-        return PLURAL_DEFAULT_RULE;
+        return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1);
      }
      else {
          return mRules->select(number);
      }
      else {
          return mRules->select(number);
@@ -178,13 +197,81 @@ StringEnumeration*
  PluralRules::getKeywords(UErrorCode& status) const {
      if (U_FAILURE(status))  return NULL;
      StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status);
  PluralRules::getKeywords(UErrorCode& status) const {
      if (U_FAILURE(status))  return NULL;
      StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status);
+    if (U_FAILURE(status)) {
+      delete nameEnumerator;
+      return NULL;
+    }
+
      return nameEnumerator;
  }
  
      return nameEnumerator;
  }
  
+double
+PluralRules::getUniqueKeywordValue(const UnicodeString& keyword) {
+  double val = 0.0;
+  UErrorCode status = U_ZERO_ERROR;
+  int32_t count = getSamplesInternal(keyword, &val, 1, FALSE, status);
+  return count == 1 ? val : UPLRULES_NO_UNIQUE_VALUE;
+}
+
+int32_t
+PluralRules::getAllKeywordValues(const UnicodeString &keyword, double *dest,
+                                 int32_t destCapacity, UErrorCode& error) {
+    return getSamplesInternal(keyword, dest, destCapacity, FALSE, error);
+}
+
+int32_t
+PluralRules::getSamples(const UnicodeString &keyword, double *dest,
+                        int32_t destCapacity, UErrorCode& status) {
+    return getSamplesInternal(keyword, dest, destCapacity, TRUE, status);
+}
+
+int32_t
+PluralRules::getSamplesInternal(const UnicodeString &keyword, double *dest,
+                                int32_t destCapacity, UBool includeUnlimited,
+                                UErrorCode& status) {
+    initSamples(status);
+    if (U_FAILURE(status)) {
+        return -1;
+    }
+    if (destCapacity < 0 || (dest == NULL && destCapacity > 0)) {
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+        return -1;
+    }
+
+    int32_t index = getKeywordIndex(keyword, status);
+    if (index == -1) {
+        return 0;
+    }
+
+    const int32_t LIMIT_MASK = 0x1 << 31;
+
+    if (!includeUnlimited) {
+        if ((mSampleInfo[index] & LIMIT_MASK) == 0) {
+            return -1;
+        }
+    }
+
+    int32_t start = index == 0 ? 0 : mSampleInfo[index - 1] & ~LIMIT_MASK;
+    int32_t limit = mSampleInfo[index] & ~LIMIT_MASK;
+    int32_t len = limit - start;
+    if (len <= destCapacity) {
+        destCapacity = len;
+    } else if (includeUnlimited) {
+        len = destCapacity;  // no overflow, and don't report more than we copy
+    } else {
+        status = U_BUFFER_OVERFLOW_ERROR;
+        return len;
+    }
+    for (int32_t i = 0; i < destCapacity; ++i, ++start) {
+        dest[i] = mSamples[start];
+    }
+    return len;
+}
+
  
  UBool
  PluralRules::isKeyword(const UnicodeString& keyword) const {
  
  UBool
  PluralRules::isKeyword(const UnicodeString& keyword) const {
-    if ( keyword == PLURAL_KEYWORD_OTHER ) {
+    if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
          return true;
      }
      else {
          return true;
      }
      else {
@@ -199,44 +286,42 @@ PluralRules::isKeyword(const UnicodeString& keyword) const {
  
  UnicodeString
  PluralRules::getKeywordOther() const {
  
  UnicodeString
  PluralRules::getKeywordOther() const {
-    return PLURAL_KEYWORD_OTHER;
+    return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5);
  }
  
  UBool
  PluralRules::operator==(const PluralRules& other) const  {
      int32_t limit;
  }
  
  UBool
  PluralRules::operator==(const PluralRules& other) const  {
      int32_t limit;
-    UBool sameList = TRUE;
      const UnicodeString *ptrKeyword;
      UErrorCode status= U_ZERO_ERROR;
  
      if ( this == &other ) {
          return TRUE;
      }
      const UnicodeString *ptrKeyword;
      UErrorCode status= U_ZERO_ERROR;
  
      if ( this == &other ) {
          return TRUE;
      }
-    StringEnumeration* myKeywordList = getKeywords(status);
-    StringEnumeration* otherKeywordList =other.getKeywords(status);
+    LocalPointer<StringEnumeration> myKeywordList(getKeywords(status));
+    LocalPointer<StringEnumeration> otherKeywordList(other.getKeywords(status));
+    if (U_FAILURE(status)) {
+        return FALSE;
+    }
  
      if (myKeywordList->count(status)!=otherKeywordList->count(status)) {
  
      if (myKeywordList->count(status)!=otherKeywordList->count(status)) {
-        sameList = FALSE;
+        return FALSE;
      }
      }
-    else {
-        myKeywordList->reset(status);
-        while (sameList && (ptrKeyword=myKeywordList->snext(status))!=NULL) {
-            if (!other.isKeyword(*ptrKeyword)) {
-                sameList = FALSE;
-            }
-        }
-        otherKeywordList->reset(status);
-        while (sameList && (ptrKeyword=otherKeywordList->snext(status))!=NULL) {
-            if (!this->isKeyword(*ptrKeyword))  {
-                sameList = FALSE;
-            }
+    myKeywordList->reset(status);
+    while ((ptrKeyword=myKeywordList->snext(status))!=NULL) {
+        if (!other.isKeyword(*ptrKeyword)) {
+            return FALSE;
          }
          }
-        delete myKeywordList;
-        delete otherKeywordList;
-        if (!sameList) {
+    }
+    otherKeywordList->reset(status);
+    while ((ptrKeyword=otherKeywordList->snext(status))!=NULL) {
+        if (!this->isKeyword(*ptrKeyword)) {
              return FALSE;
          }
      }
              return FALSE;
          }
      }
+    if (U_FAILURE(status)) {
+        return FALSE;
+    }
  
      if ((limit=this->getRepeatLimit()) != other.getRepeatLimit()) {
          return FALSE;
  
      if ((limit=this->getRepeatLimit()) != other.getRepeatLimit()) {
          return FALSE;
@@ -264,7 +349,10 @@ PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode
      OrConstraint *orNode=NULL;
      RuleChain *lastChain=NULL;
  
      OrConstraint *orNode=NULL;
      RuleChain *lastChain=NULL;
  
-    UnicodeString ruleData = data.toLower();
+    if (U_FAILURE(status)) {
+        return;
+    }
+    UnicodeString ruleData = data.toLower("");
      while (ruleIndex< ruleData.length()) {
          mParser->getNextToken(ruleData, &ruleIndex, token, type, status);
          if (U_FAILURE(status)) {
      while (ruleIndex< ruleData.length()) {
          mParser->getNextToken(ruleData, &ruleIndex, token, type, status);
          if (U_FAILURE(status)) {
@@ -276,6 +364,7 @@ PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode
          }
          switch (type) {
          case tAnd:
          }
          switch (type) {
          case tAnd:
+            U_ASSERT(curAndConstraint != NULL);
              curAndConstraint = curAndConstraint->add();
              break;
          case tOr:
              curAndConstraint = curAndConstraint->add();
              break;
          case tOr:
@@ -293,19 +382,24 @@ PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode
              curAndConstraint = orNode->add();
              break;
          case tIs:
              curAndConstraint = orNode->add();
              break;
          case tIs:
+            U_ASSERT(curAndConstraint != NULL);
              curAndConstraint->rangeHigh=-1;
              break;
          case tNot:
              curAndConstraint->rangeHigh=-1;
              break;
          case tNot:
+            U_ASSERT(curAndConstraint != NULL);
              curAndConstraint->notIn=TRUE;
              break;
          case tIn:
              curAndConstraint->notIn=TRUE;
              break;
          case tIn:
+            U_ASSERT(curAndConstraint != NULL);
              curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
              curAndConstraint->integerOnly = TRUE;
              break;
          case tWithin:
              curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
              curAndConstraint->integerOnly = TRUE;
              break;
          case tWithin:
+            U_ASSERT(curAndConstraint != NULL);
              curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
              break;
          case tNumber:
              curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
              break;
          case tNumber:
+            U_ASSERT(curAndConstraint != NULL);
              if ( (curAndConstraint->op==AndConstraint::MOD)&&
                   (curAndConstraint->opNum == -1 ) ) {
                  curAndConstraint->opNum=getNumberValue(token);
              if ( (curAndConstraint->op==AndConstraint::MOD)&&
                   (curAndConstraint->opNum == -1 ) ) {
                  curAndConstraint->opNum=getNumberValue(token);
@@ -320,6 +414,7 @@ PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode
              }
              break;
          case tMod:
              }
              break;
          case tMod:
+            U_ASSERT(curAndConstraint != NULL);
              curAndConstraint->op=AndConstraint::MOD;
              break;
          case tKeyword:
              curAndConstraint->op=AndConstraint::MOD;
              break;
          case tKeyword:
@@ -332,6 +427,9 @@ PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode
                  }
                  ruleChain=ruleChain->next=new RuleChain();
              }
                  }
                  ruleChain=ruleChain->next=new RuleChain();
              }
+            if (ruleChain->ruleHeader != NULL) {
+                delete ruleChain->ruleHeader;
+            }
              orNode = ruleChain->ruleHeader = new OrConstraint();
              curAndConstraint = orNode->add();
              ruleChain->keyword = token;
              orNode = ruleChain->ruleHeader = new OrConstraint();
              curAndConstraint = orNode->add();
              ruleChain->keyword = token;
@@ -387,6 +485,172 @@ PluralRules::getRepeatLimit() const {
      }
  }
  
      }
  }
  
+int32_t
+PluralRules::getKeywordIndex(const UnicodeString& keyword,
+                             UErrorCode& status) const {
+    if (U_SUCCESS(status)) {
+        int32_t n = 0;
+        RuleChain* rc = mRules;
+        while (rc != NULL) {
+            if (rc->ruleHeader != NULL) {
+                if (rc->keyword == keyword) {
+                    return n;
+                }
+                ++n;
+            }
+            rc = rc->next;
+        }
+        if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
+            return n;
+        }
+    }
+    return -1;
+}
+
+typedef struct SampleRecord {
+    int32_t ruleIndex;
+    double  value;
+} SampleRecord;
+
+void
+PluralRules::initSamples(UErrorCode& status) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+    Mutex lock(&pluralMutex);
+
+    if (mSamples) {
+        return;
+    }
+
+    // Note, the original design let you have multiple rules with the same keyword.  But
+    // we don't use that in our data and existing functions in this implementation don't
+    // fully support it (for example, the returned keywords is a list and not a set).
+    //
+    // So I don't support this here either.  If you ask for samples, or for all values,
+    // you will get information about the first rule with that keyword, not all rules with
+    // that keyword.
+
+    int32_t maxIndex = 0;
+    int32_t otherIndex = -1; // the value -1 will indicate we added 'other' at end
+    RuleChain* rc = mRules;
+    while (rc != NULL) {
+        if (rc->ruleHeader != NULL) {
+            if (otherIndex == -1 && 0 == rc->keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
+                otherIndex = maxIndex;
+            }
+            ++maxIndex;
+        }
+        rc = rc->next;
+    }
+    if (otherIndex == -1) {
+        ++maxIndex;
+    }
+
+    LocalMemory<int32_t> newSampleInfo;
+    if (NULL == newSampleInfo.allocateInsteadAndCopy(maxIndex)) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+
+    const int32_t LIMIT_MASK = 0x1 << 31;
+
+    rc = mRules;
+    int32_t n = 0;
+    while (rc != NULL) {
+        if (rc->ruleHeader != NULL) {
+            newSampleInfo[n++] = rc->ruleHeader->isLimited() ? LIMIT_MASK : 0;
+        }
+        rc = rc->next;
+    }
+    if (otherIndex == -1) {
+        newSampleInfo[maxIndex - 1] = 0; // unlimited
+    }
+
+    MaybeStackArray<SampleRecord, 10> newSamples;
+    int32_t sampleCount = 0;
+
+    int32_t limit = getRepeatLimit() * MAX_SAMPLES * 2;
+    if (limit < 10) {
+        limit = 10;
+    }
+
+    for (int i = 0, keywordsRemaining = maxIndex;
+          keywordsRemaining > 0 && i < limit;
+          ++i) {
+        double val = i / 2.0;
+
+        n = 0;
+        rc = mRules;
+        int32_t found = -1;
+        while (rc != NULL) {
+            if (rc->ruleHeader != NULL) {
+                if (rc->ruleHeader->isFulfilled(val)) {
+                    found = n;
+                    break;
+                }
+                ++n;
+            }
+            rc = rc->next;
+        }
+        if (found == -1) {
+            // 'other'.  If there is an 'other' rule, the rule set is bad since nothing
+            // should leak through, but we don't bother to report that here.
+            found = otherIndex == -1 ? maxIndex - 1 : otherIndex;
+        }
+        if (newSampleInfo[found] == MAX_SAMPLES) { // limit flag not set
+            continue;
+        }
+        newSampleInfo[found] += 1; // won't impact limit flag
+
+        if (sampleCount == newSamples.getCapacity()) {
+            int32_t newCapacity = sampleCount < 20 ? 128 : sampleCount * 2;
+            if (NULL == newSamples.resize(newCapacity, sampleCount)) {
+                status = U_MEMORY_ALLOCATION_ERROR;
+                return;
+            }
+        }
+        newSamples[sampleCount].ruleIndex = found;
+        newSamples[sampleCount].value = val;
+        ++sampleCount;
+
+        if (newSampleInfo[found] == MAX_SAMPLES) { // limit flag not set
+            --keywordsRemaining;
+        }
+    }
+
+    // sort the values by index, leaving order otherwise unchanged
+    // this is just a selection sort for simplicity
+    LocalMemory<double> values;
+    if (NULL == values.allocateInsteadAndCopy(sampleCount)) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+    for (int i = 0, j = 0; i < maxIndex; ++i) {
+        for (int k = 0; k < sampleCount; ++k) {
+            if (newSamples[k].ruleIndex == i) {
+                values[j++] = newSamples[k].value;
+            }
+        }
+    }
+
+    // convert array of mask/lengths to array of mask/limits
+    limit = 0;
+    for (int i = 0; i < maxIndex; ++i) {
+        int32_t info = newSampleInfo[i];
+        int32_t len = info & ~LIMIT_MASK;
+        limit += len;
+        // if a rule is 'unlimited' but has fewer than MAX_SAMPLES samples,
+        // it's not really unlimited, so mark it as limited
+        int32_t mask = len < MAX_SAMPLES ? LIMIT_MASK : info & LIMIT_MASK;
+        newSampleInfo[i] = limit | mask;
+    }
+
+    // ok, we've got good data
+    mSamples = values.orphan();
+    mSampleInfo = newSampleInfo.orphan();
+    mSampleInfoCount = maxIndex;
+}
  
  void
  PluralRules::addRules(RuleChain& rules) {
  
  void
  PluralRules::addRules(RuleChain& rules) {
@@ -398,8 +662,10 @@ PluralRules::addRules(RuleChain& rules) {
  UnicodeString
  PluralRules::getRuleFromResource(const Locale& locale, UErrorCode& errCode) {
      UnicodeString emptyStr;
  UnicodeString
  PluralRules::getRuleFromResource(const Locale& locale, UErrorCode& errCode) {
      UnicodeString emptyStr;
-    
-    errCode = U_ZERO_ERROR;
+
+    if (U_FAILURE(errCode)) {
+        return emptyStr;
+    }
      UResourceBundle *rb=ures_openDirect(NULL, "plurals", &errCode);
      if(U_FAILURE(errCode)) {
          /* total failure, not even root could be opened */
      UResourceBundle *rb=ures_openDirect(NULL, "plurals", &errCode);
      if(U_FAILURE(errCode)) {
          /* total failure, not even root could be opened */
@@ -409,7 +675,7 @@ PluralRules::getRuleFromResource(const Locale& locale, UErrorCode& errCode) {
      if(U_FAILURE(errCode)) {
          ures_close(rb);
          return emptyStr;
      if(U_FAILURE(errCode)) {
          ures_close(rb);
          return emptyStr;
-    }   
+    }
      int32_t resLen=0;
      const char *curLocaleName=locale.getName();
      const UChar* s = ures_getStringByKey(locRes, curLocaleName, &resLen, &errCode);
      int32_t resLen=0;
      const char *curLocaleName=locale.getName();
      const UChar* s = ures_getStringByKey(locRes, curLocaleName, &resLen, &errCode);
@@ -421,8 +687,8 @@ PluralRules::getRuleFromResource(const Locale& locale, UErrorCode& errCode) {
          const char *curLocaleName=locale.getName();
          int32_t localeNameLen=0;
          uprv_strcpy(parentLocaleName, curLocaleName);
          const char *curLocaleName=locale.getName();
          int32_t localeNameLen=0;
          uprv_strcpy(parentLocaleName, curLocaleName);
-        
-        while ((localeNameLen=uloc_getParent(parentLocaleName, parentLocaleName, 
+
+        while ((localeNameLen=uloc_getParent(parentLocaleName, parentLocaleName,
                                         ULOC_FULLNAME_CAPACITY, &status)) > 0) {
              resLen=0;
              s = ures_getStringByKey(locRes, parentLocaleName, &resLen, &status);
                                         ULOC_FULLNAME_CAPACITY, &status)) > 0) {
              resLen=0;
              s = ures_getStringByKey(locRes, parentLocaleName, &resLen, &status);
@@ -438,12 +704,12 @@ PluralRules::getRuleFromResource(const Locale& locale, UErrorCode& errCode) {
          ures_close(rb);
          return emptyStr;
      }
          ures_close(rb);
          return emptyStr;
      }
-    
+
      char setKey[256];
      UChar result[256];
      u_UCharsToChars(s, setKey, resLen + 1);
      // printf("\n PluralRule: %s\n", setKey);
      char setKey[256];
      UChar result[256];
      u_UCharsToChars(s, setKey, resLen + 1);
      // printf("\n PluralRule: %s\n", setKey);
-    
+
  
      UResourceBundle *ruleRes=ures_getByKey(rb, "rules", NULL, &errCode);
      if(U_FAILURE(errCode)) {
  
      UResourceBundle *ruleRes=ures_getByKey(rb, "rules", NULL, &errCode);
      if(U_FAILURE(errCode)) {
@@ -467,7 +733,7 @@ PluralRules::getRuleFromResource(const Locale& locale, UErrorCode& errCode) {
          int32_t keyLen;
          resLen=0;
          s=ures_getNextString(setRes, &resLen, (const char**)&key, &errCode);
          int32_t keyLen;
          resLen=0;
          s=ures_getNextString(setRes, &resLen, (const char**)&key, &errCode);
-        keyLen = uprv_strlen(key);
+        keyLen = (int32_t)uprv_strlen(key);
          u_charsToUChars(key, result+len, keyLen);
          len += keyLen;
          result[len++]=COLON;
          u_charsToUChars(key, result+len, keyLen);
          len += keyLen;
          result[len++]=COLON;
@@ -484,7 +750,6 @@ PluralRules::getRuleFromResource(const Locale& locale, UErrorCode& errCode) {
      ures_close(locRes);
      ures_close(rb);
      return UnicodeString(result);
      ures_close(locRes);
      ures_close(rb);
      return UnicodeString(result);
-    
  }
  
  AndConstraint::AndConstraint() {
  }
  
  AndConstraint::AndConstraint() {
@@ -524,7 +789,12 @@ UBool
  AndConstraint::isFulfilled(double number) {
      UBool result=TRUE;
      double value=number;
  AndConstraint::isFulfilled(double number) {
      UBool result=TRUE;
      double value=number;
-    
+
+    // arrrrrrgh
+    if ((rangeHigh == -1 || integerOnly) && number != uprv_floor(number)) {
+      return notIn;
+    }
+
      if ( op == MOD ) {
          value = (int32_t)value % opNum;
      }
      if ( op == MOD ) {
          value = (int32_t)value % opNum;
      }
@@ -567,15 +837,19 @@ AndConstraint::isFulfilled(double number) {
      }
  }
  
      }
  }
  
+UBool 
+AndConstraint::isLimited() {
+    return (rangeHigh == -1 || integerOnly) && !notIn && op != MOD;
+}
+
  int32_t
  AndConstraint::updateRepeatLimit(int32_t maxLimit) {
  int32_t
  AndConstraint::updateRepeatLimit(int32_t maxLimit) {
-    
+
      if ( op == MOD ) {
          return uprv_max(opNum, maxLimit);
      }
      else {
          if ( rangeHigh == -1 ) {
      if ( op == MOD ) {
          return uprv_max(opNum, maxLimit);
      }
      else {
          if ( rangeHigh == -1 ) {
-            return(rangeLow>maxLimit? rangeLow : maxLimit);
              return uprv_max(rangeLow, maxLimit);
          }
          else{
              return uprv_max(rangeLow, maxLimit);
          }
          else{
@@ -639,7 +913,7 @@ UBool
  OrConstraint::isFulfilled(double number) {
      OrConstraint* orRule=this;
      UBool result=FALSE;
  OrConstraint::isFulfilled(double number) {
      OrConstraint* orRule=this;
      UBool result=FALSE;
-    
+
      while (orRule!=NULL && !result) {
          result=TRUE;
          AndConstraint* andRule = orRule->childNode;
      while (orRule!=NULL && !result) {
          result=TRUE;
          AndConstraint* andRule = orRule->childNode;
@@ -649,10 +923,26 @@ OrConstraint::isFulfilled(double number) {
          }
          orRule = orRule->next;
      }
          }
          orRule = orRule->next;
      }
-    
+
      return result;
  }
  
      return result;
  }
  
+UBool
+OrConstraint::isLimited() {
+    for (OrConstraint *orc = this; orc != NULL; orc = orc->next) {
+        UBool result = FALSE;
+        for (AndConstraint *andc = orc->childNode; andc != NULL; andc = andc->next) {
+            if (andc->isLimited()) {
+                result = TRUE;
+                break;
+            }
+        }
+        if (result == FALSE) {
+            return FALSE;
+        }
+    }
+    return TRUE;
+}
  
  RuleChain::RuleChain() {
      ruleHeader=NULL;
  
  RuleChain::RuleChain() {
      ruleHeader=NULL;
@@ -689,7 +979,7 @@ RuleChain::~RuleChain() {
  
  UnicodeString
  RuleChain::select(double number) const {
  
  UnicodeString
  RuleChain::select(double number) const {
-   
+
     if ( ruleHeader != NULL ) {
         if (ruleHeader->isFulfilled(number)) {
             return keyword;
     if ( ruleHeader != NULL ) {
         if (ruleHeader->isFulfilled(number)) {
             return keyword;
@@ -699,7 +989,7 @@ RuleChain::select(double number) const {
         return next->select(number);
     }
     else {
         return next->select(number);
     }
     else {
-       return PLURAL_KEYWORD_OTHER;
+       return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5);
     }
  
  }
     }
  
  }
@@ -707,7 +997,7 @@ RuleChain::select(double number) const {
  void
  RuleChain::dumpRules(UnicodeString& result) {
      UChar digitString[16];
  void
  RuleChain::dumpRules(UnicodeString& result) {
      UChar digitString[16];
-    
+
      if ( ruleHeader != NULL ) {
          result +=  keyword;
          OrConstraint* orRule=ruleHeader;
      if ( ruleHeader != NULL ) {
          result +=  keyword;
          OrConstraint* orRule=ruleHeader;
@@ -772,11 +1062,11 @@ RuleChain::dumpRules(UnicodeString& result) {
                      }
                  }
                  if ( (andRule=andRule->next) != NULL) {
                      }
                  }
                  if ( (andRule=andRule->next) != NULL) {
-                    result += PK_AND;
+                    result.append(PK_AND, 3);
                  }
              }
              if ( (orRule = orRule->next) != NULL ) {
                  }
              }
              if ( (orRule = orRule->next) != NULL ) {
-                result += PK_OR;
+                result.append(PK_OR, 2);
              }
          }
      }
              }
          }
      }
@@ -846,16 +1136,9 @@ RuleChain::isKeyword(const UnicodeString& keywordParam) const {
  
  
  RuleParser::RuleParser() {
  
  
  RuleParser::RuleParser() {
-    UErrorCode err=U_ZERO_ERROR;
-    const UnicodeString idStart=UNICODE_STRING_SIMPLE("[[a-z]]");
-    const UnicodeString idContinue=UNICODE_STRING_SIMPLE("[[a-z][A-Z][_][0-9]]");
-    idStartFilter = new UnicodeSet(idStart, err);
-    idContinueFilter = new UnicodeSet(idContinue, err);
  }
  
  RuleParser::~RuleParser() {
  }
  
  RuleParser::~RuleParser() {
-    delete idStartFilter;
-    delete idContinueFilter;
  }
  
  void
  }
  
  void
@@ -872,7 +1155,7 @@ RuleParser::checkSyntax(tokenType prevType, tokenType curType, UErrorCode &statu
          }
          break;
      case tVariableN :
          }
          break;
      case tVariableN :
-        if (curType != tIs && curType != tMod && curType != tIn && 
+        if (curType != tIs && curType != tMod && curType != tIn &&
              curType != tNot && curType != tWithin) {
              status = U_UNEXPECTED_TOKEN;
          }
              curType != tNot && curType != tWithin) {
              status = U_UNEXPECTED_TOKEN;
          }
@@ -937,6 +1220,9 @@ RuleParser::getNextToken(const UnicodeString& ruleData,
      UChar ch;
      tokenType prevType=none;
  
      UChar ch;
      tokenType prevType=none;
  
+    if (U_FAILURE(status)) {
+        return;
+    }
      while (curIndex<ruleData.length()) {
          ch = ruleData.charAt(curIndex);
          if ( !inRange(ch, type) ) {
      while (curIndex<ruleData.length()) {
          ch = ruleData.charAt(curIndex);
          if ( !inRange(ch, type) ) {
@@ -1013,6 +1299,9 @@ RuleParser::getNextToken(const UnicodeString& ruleData,
          if ( (type == tLetter)||(type == tNumber) ) {
              token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
              getKeyType(token, type, status);
          if ( (type == tLetter)||(type == tNumber) ) {
              token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
              getKeyType(token, type, status);
+            if (U_FAILURE(status)) {
+                return;
+            }
          }
          *ruleIndex = ruleData.length();
      }
          }
          *ruleIndex = ruleData.length();
      }
@@ -1055,30 +1344,33 @@ RuleParser::inRange(UChar ch, tokenType& type) {
  void
  RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCode &status)
  {
  void
  RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCode &status)
  {
+    if (U_FAILURE(status)) {
+        return;
+    }
      if ( keyType==tNumber) {
      }
      if ( keyType==tNumber) {
      }
-    else if (token==PK_VAR_N) {
+    else if (0 == token.compare(PK_VAR_N, 1)) {
          keyType = tVariableN;
      }
          keyType = tVariableN;
      }
-    else if (token==PK_IS) {
+    else if (0 == token.compare(PK_IS, 2)) {
          keyType = tIs;
      }
          keyType = tIs;
      }
-    else if (token==PK_AND) {
+    else if (0 == token.compare(PK_AND, 3)) {
          keyType = tAnd;
      }
          keyType = tAnd;
      }
-    else if (token==PK_IN) {
+    else if (0 == token.compare(PK_IN, 2)) {
          keyType = tIn;
      }
          keyType = tIn;
      }
-    else if (token==PK_WITHIN) {
+    else if (0 == token.compare(PK_WITHIN, 6)) {
          keyType = tWithin;
      }
          keyType = tWithin;
      }
-    else if (token==PK_NOT) {
+    else if (0 == token.compare(PK_NOT, 3)) {
          keyType = tNot;
      }
          keyType = tNot;
      }
-    else if (token==PK_MOD) {
+    else if (0 == token.compare(PK_MOD, 3)) {
          keyType = tMod;
      }
          keyType = tMod;
      }
-    else if (token==PK_OR) {
+    else if (0 == token.compare(PK_OR, 2)) {
          keyType = tOr;
      }
      else if ( isValidKeyword(token) ) {
          keyType = tOr;
      }
      else if ( isValidKeyword(token) ) {
@@ -1091,39 +1383,28 @@ RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCod
  
  UBool
  RuleParser::isValidKeyword(const UnicodeString& token) {
  
  UBool
  RuleParser::isValidKeyword(const UnicodeString& token) {
-    if ( token.length()==0 ) {
-        return FALSE;
-    }
-    if ( idStartFilter->contains(token.charAt(0) )==TRUE ) {
-        int32_t i;
-        for (i=1; i< token.length(); i++) {
-            if (idContinueFilter->contains(token.charAt(i))== FALSE) {
-                return FALSE;
-            }
-        }
-        return TRUE;
-    }
-    else {
-        return FALSE;
-    }
+    return PatternProps::isIdentifier(token.getBuffer(), token.length());
  }
  
  }
  
-PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status) :
-fKeywordNames(status)
-{
+PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status)
+        : pos(0), fKeywordNames(status) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+    fKeywordNames.setDeleter(uprv_deleteUObject);
+    UBool  addKeywordOther=TRUE;
      RuleChain *node=header;
      RuleChain *node=header;
-    UBool  addKeywordOther=true;
-    
-    pos=0;
-    fKeywordNames.removeAllElements();
      while(node!=NULL) {
          fKeywordNames.addElement(new UnicodeString(node->keyword), status);
      while(node!=NULL) {
          fKeywordNames.addElement(new UnicodeString(node->keyword), status);
-        if (node->keyword == PLURAL_KEYWORD_OTHER) {
-            addKeywordOther= false;
+        if (U_FAILURE(status)) {
+            return;
+        }
+        if (0 == node->keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
+            addKeywordOther= FALSE;
          }
          node=node->next;
      }
          }
          node=node->next;
      }
-    
+
      if (addKeywordOther) {
          fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status);
      }
      if (addKeywordOther) {
          fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status);
      }
@@ -1148,12 +1429,6 @@ PluralKeywordEnumeration::count(UErrorCode& /*status*/) const {
  }
  
  PluralKeywordEnumeration::~PluralKeywordEnumeration() {
  }
  
  PluralKeywordEnumeration::~PluralKeywordEnumeration() {
-    UnicodeString *s;
-    for (int32_t i=0; i<fKeywordNames.size(); ++i) {
-        if ((s=(UnicodeString *)fKeywordNames.elementAt(i))!=NULL) {
-            delete s;
-        }
-    }
  }
  
  U_NAMESPACE_END
  }
  
  U_NAMESPACE_END