ICU-62107.0.1.tar.gz

[apple/icu.git] / icuSources / common / uniset_props.cpp
diff --git a/icuSources/common/uniset_props.cpp b/icuSources/common/uniset_props.cpp

index 0a13740840b2b5fc98ac03546525996c4622ccc3..6ae6e71289b6d8b40959f0059707fee4192870a1 100644 (file)
--- a/icuSources/common/uniset_props.cpp
+++ b/icuSources/common/uniset_props.cpp
@@ -231,7 +231,7 @@ void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status) {
          ucase_addPropertyStarts(&sa, &status);
          break;
      case UPROPS_SRC_BIDI:
-        ubidi_addPropertyStarts(ubidi_getSingleton(), &sa, &status);
+        ubidi_addPropertyStarts(&sa, &status);
          break;
      default:
          status = U_INTERNAL_PROGRAM_ERROR;
@@ -257,6 +257,7 @@ const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) {
      return i.fSet;
  }
  
+namespace {
  
  // Cache some sets for other services -------------------------------------- ***
  void U_CALLCONV createUni32Set(UErrorCode &errorCode) {
@@ -315,6 +316,8 @@ isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
  // memory leak checker tools
  #define _dbgct(me)
  
+}  // namespace
+
  //----------------------------------------------------------------
  // Constructors &c
  //----------------------------------------------------------------
@@ -382,7 +385,7 @@ UnicodeSet::applyPatternIgnoreSpace(const UnicodeString& pattern,
      // _applyPattern calls add() etc., which set pat to empty.
      UnicodeString rebuiltPat;
      RuleCharacterIterator chars(pattern, symbols, pos);
-    applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, status);
+    applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, 0, status);
      if (U_FAILURE(status)) return;
      if (chars.inVariable()) {
          // syntaxError(chars, "Extra chars in variable value");
@@ -406,6 +409,8 @@ UBool UnicodeSet::resemblesPattern(const UnicodeString& pattern, int32_t pos) {
  // Implementation: Pattern parsing
  //----------------------------------------------------------------
  
+namespace {
+
  /**
   * A small all-inline class to manage a UnicodeSet pointer.  Add
   * operator->() etc. as needed.
@@ -424,6 +429,10 @@ public:
      }
  };
  
+constexpr int32_t MAX_DEPTH = 100;
+
+}  // namespace
+
  /**
   * Parse the pattern from the given RuleCharacterIterator.  The
   * iterator is advanced over the parsed pattern.
@@ -443,8 +452,13 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
                                UnicodeString& rebuiltPat,
                                uint32_t options,
                                UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
+                              int32_t depth,
                                UErrorCode& ec) {
      if (U_FAILURE(ec)) return;
+    if (depth > MAX_DEPTH) {
+        ec = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
  
      // Syntax characters: [ ] ^ - & { }
  
@@ -579,7 +593,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
              }
              switch (setMode) {
              case 1:
-                nested->applyPattern(chars, symbols, patLocal, options, caseClosure, ec);
+                nested->applyPattern(chars, symbols, patLocal, options, caseClosure, depth + 1, ec);
                  break;
              case 2:
                  chars.skipIgnored(opts);
@@ -837,6 +851,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
  // Property set implementation
  //----------------------------------------------------------------
  
+namespace {
+
  static UBool numericValueFilter(UChar32 ch, void* context) {
      return u_getNumericValue(ch) == *(double*)context;
  }
@@ -868,6 +884,8 @@ static UBool scriptExtensionsFilter(UChar32 ch, void* context) {
      return uscript_hasScript(ch, *(UScriptCode*)context);
  }
  
+}  // namespace
+
  /**
   * Generic filter-based scanning code for UCD property UnicodeSets.
   */
@@ -924,6 +942,8 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
      }
  }
  
+namespace {
+
  static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
      /* Note: we use ' ' in compiler code page */
      int32_t j = 0;
@@ -941,6 +961,8 @@ static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
      return TRUE;
  }
  
+}  // namespace
+
  //----------------------------------------------------------------
  // Property set API
  //----------------------------------------------------------------
@@ -987,7 +1009,7 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
  
      UProperty p;
      int32_t v;
-    UBool mustNotBeEmpty = FALSE, invert = FALSE;
+    UBool invert = FALSE;
  
      if (value.length() > 0) {
          p = u_getPropertyEnum(pname.data());
@@ -1009,14 +1031,15 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
                      p == UCHAR_LEAD_CANONICAL_COMBINING_CLASS) {
                      char* end;
                      double value = uprv_strtod(vname.data(), &end);
-                    v = (int32_t) value;
-                    if (v != value || v < 0 || *end != 0) {
-                        // non-integral or negative value, or trailing junk
+                    // Anything between 0 and 255 is valid even if unused.
+                    // Cast double->int only after range check.
+                    // We catch NaN here because comparing it with both 0 and 255 will be false
+                    // (as are all comparisons with NaN).
+                    if (*end != 0 || !(0 <= value && value <= 255) ||
+                            (v = (int32_t)value) != value) {
+                        // non-integral value or outside 0..255, or trailing junk
                          FAIL(ec);
                      }
-                    // If the resultant set is empty then the numeric value
-                    // was invalid.
-                    mustNotBeEmpty = TRUE;
                  } else {
                      FAIL(ec);
                  }
@@ -1115,12 +1138,6 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
          complement();
      }
  
-    if (U_SUCCESS(ec) && (mustNotBeEmpty && isEmpty())) {
-        // mustNotBeEmpty is set to true if an empty set indicates
-        // invalid input.
-        ec = U_ILLEGAL_ARGUMENT_ERROR;
-    }
-
      if (isBogus() && U_SUCCESS(ec)) {
          // We likely ran out of memory. AHHH!
          ec = U_MEMORY_ALLOCATION_ERROR;