ICU-64260.0.1.tar.gz

[apple/icu.git] / icuSources / common / unames.cpp
diff --git a/icuSources/common/unames.cpp b/icuSources/common/unames.cpp

index 932d2024968c5603e00bc87c7e1c323a69a090d4..d9f61cac1575a9acfe9f8be9e153c8a4b0db5438 100644 (file)
--- a/icuSources/common/unames.cpp
+++ b/icuSources/common/unames.cpp
@@ -1,12 +1,14 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
  /*
  ******************************************************************************
  *
-*   Copyright (C) 1999-2011, International Business Machines
+*   Copyright (C) 1999-2014, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  ******************************************************************************
  *   file name:  unames.c
-*   encoding:   US-ASCII
+*   encoding:   UTF-8
  *   tab size:   8 (not used)
  *   indentation:4
  *
@@ -20,6 +22,7 @@
  #include "unicode/udata.h"
  #include "unicode/utf.h"
  #include "unicode/utf16.h"
+#include "uassert.h"
  #include "ustr_imp.h"
  #include "umutex.h"
  #include "cmemory.h"
@@ -28,9 +31,9 @@
  #include "udataswp.h"
  #include "uprops.h"
  
-/* prototypes ------------------------------------------------------------- */
+U_NAMESPACE_BEGIN
  
-#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+/* prototypes ------------------------------------------------------------- */
  
  static const char DATA_NAME[] = "unames";
  static const char DATA_TYPE[] = "icu";
@@ -102,7 +105,7 @@ typedef struct {
  
  static UDataMemory *uCharNamesData=NULL;
  static UCharNames *uCharNames=NULL;
-static UErrorCode gLoadErrorCode=U_ZERO_ERROR;
+static icu::UInitOnce gCharNamesInitOnce = U_INITONCE_INITIALIZER;
  
  /*
   * Maximum length of character names (regular & 1.0).
@@ -168,6 +171,7 @@ static UBool U_CALLCONV unames_cleanup(void)
      if(uCharNames) {
          uCharNames = NULL;
      }
+    gCharNamesInitOnce.reset();
      gMaxNameLength=0;
      return TRUE;
  }
@@ -187,52 +191,25 @@ isAcceptable(void * /*context*/,
          pInfo->formatVersion[0]==1);
  }
  
-static UBool
-isDataLoaded(UErrorCode *pErrorCode) {
-    /* load UCharNames from file if necessary */
-    UBool isCached;
-
-    /* do this because double-checked locking is broken */
-    UMTX_CHECK(NULL, (uCharNames!=NULL), isCached);
+static void U_CALLCONV
+loadCharNames(UErrorCode &status) {
+    U_ASSERT(uCharNamesData == NULL);
+    U_ASSERT(uCharNames == NULL);
  
-    if(!isCached) {
-        UCharNames *names;
-        UDataMemory *data;
-
-        /* check error code from previous attempt */
-        if(U_FAILURE(gLoadErrorCode)) {
-            *pErrorCode=gLoadErrorCode;
-            return FALSE;
-        }
-
-        /* open the data outside the mutex block */
-        data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
-        if(U_FAILURE(*pErrorCode)) {
-            gLoadErrorCode=*pErrorCode;
-            return FALSE;
-        }
-
-        names=(UCharNames *)udata_getMemory(data);
+    uCharNamesData = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &status);
+    if(U_FAILURE(status)) {
+        uCharNamesData = NULL;
+    } else {
+        uCharNames = (UCharNames *)udata_getMemory(uCharNamesData);
+    }
+    ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);
+}
  
-        /* in the mutex block, set the data for this process */
-        {
-            umtx_lock(NULL);
-            if(uCharNames==NULL) {
-                uCharNamesData=data;
-                uCharNames=names;
-                data=NULL;
-                names=NULL;
-                ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);
-            }
-            umtx_unlock(NULL);
-        }
  
-        /* if a different thread set it first, then close the extra data */
-        if(data!=NULL) {
-            udata_close(data); /* NULL if it was set correctly */
-        }
-    }
-    return TRUE;
+static UBool
+isDataLoaded(UErrorCode *pErrorCode) {
+    umtx_initOnce(gCharNamesInitOnce, &loadCharNames, *pErrorCode);
+    return U_SUCCESS(*pErrorCode);
  }
  
  #define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \
@@ -461,7 +438,7 @@ static const char *getCharCatName(UChar32 cp) {
      /* Return unknown if the table of names above is not up to
         date. */
  
-    if (cat >= LENGTHOF(charCatNames)) {
+    if (cat >= UPRV_LENGTHOF(charCatNames)) {
          return "unknown";
      } else {
          return charCatNames[cat];
@@ -489,7 +466,7 @@ static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {
          buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);
      }
      buffer += ndigits;
-    length += ndigits;
+    length += static_cast<uint16_t>(ndigits);
      WRITE_CHAR(buffer, bufferLength, length, '>');
  
      return length;
@@ -1300,7 +1277,7 @@ static int32_t
  calcExtNameSetsLengths(int32_t maxNameLength) {
      int32_t i, length;
  
-    for(i=0; i<LENGTHOF(charCatNames); ++i) {
+    for(i=0; i<UPRV_LENGTHOF(charCatNames); ++i) {
          /*
           * for each category, count the length of the category name
           * plus 9=
@@ -1463,13 +1440,17 @@ calcNameSetsLengths(UErrorCode *pErrorCode) {
      return TRUE;
  }
  
+U_NAMESPACE_END
+
  /* public API --------------------------------------------------------------- */
  
+U_NAMESPACE_USE
+
  U_CAPI int32_t U_EXPORT2
  u_charName(UChar32 code, UCharNameChoice nameChoice,
             char *buffer, int32_t bufferLength,
             UErrorCode *pErrorCode) {
-    AlgorithmicRange *algRange;
+     AlgorithmicRange *algRange;
      uint32_t *p;
      uint32_t i;
      int32_t length;
@@ -1545,7 +1526,7 @@ u_charFromName(UCharNameChoice nameChoice,
      uint32_t i;
      UChar32 cp = 0;
      char c0;
-    UChar32 error = 0xffff;     /* Undefined, but use this for backwards compatibility. */
+    static constexpr UChar32 error = 0xffff;     /* Undefined, but use this for backwards compatibility. */
  
      if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
          return error;
@@ -1575,42 +1556,49 @@ u_charFromName(UCharNameChoice nameChoice,
          *pErrorCode = U_ILLEGAL_CHAR_FOUND;
          return error;
      }
+    // i==strlen(name)==strlen(lower)==strlen(upper)
  
      /* try extended names first */
      if (lower[0] == '<') {
-        if (nameChoice == U_EXTENDED_CHAR_NAME) {
-            if (lower[--i] == '>') {
-                for (--i; lower[i] && lower[i] != '-'; --i) {
-                }
-
-                if (lower[i] == '-') { /* We've got a category. */
-                    uint32_t cIdx;
-
-                    lower[i] = 0;
-
-                    for (++i; lower[i] != '>'; ++i) {
-                        if (lower[i] >= '0' && lower[i] <= '9') {
-                            cp = (cp << 4) + lower[i] - '0';
-                        } else if (lower[i] >= 'a' && lower[i] <= 'f') {
-                            cp = (cp << 4) + lower[i] - 'a' + 10;
-                        } else {
-                            *pErrorCode = U_ILLEGAL_CHAR_FOUND;
-                            return error;
-                        }
+        if (nameChoice == U_EXTENDED_CHAR_NAME && lower[--i] == '>') {
+            // Parse a string like "<category-HHHH>" where HHHH is a hex code point.
+            uint32_t limit = i;
+            while (i >= 3 && lower[--i] != '-') {}
+
+            // There should be 1 to 8 hex digits.
+            int32_t hexLength = limit - (i + 1);
+            if (i >= 2 && lower[i] == '-' && 1 <= hexLength && hexLength <= 8) {
+                uint32_t cIdx;
+
+                lower[i] = 0;
+
+                for (++i; i < limit; ++i) {
+                    if (lower[i] >= '0' && lower[i] <= '9') {
+                        cp = (cp << 4) + lower[i] - '0';
+                    } else if (lower[i] >= 'a' && lower[i] <= 'f') {
+                        cp = (cp << 4) + lower[i] - 'a' + 10;
+                    } else {
+                        *pErrorCode = U_ILLEGAL_CHAR_FOUND;
+                        return error;
                      }
+                    // Prevent signed-integer overflow and out-of-range code points.
+                    if (cp > UCHAR_MAX_VALUE) {
+                        *pErrorCode = U_ILLEGAL_CHAR_FOUND;
+                        return error;
+                    }
+                }
  
-                    /* Now validate the category name.
-                       We could use a binary search, or a trie, if
-                       we really wanted to. */
-
-                    for (lower[i] = 0, cIdx = 0; cIdx < LENGTHOF(charCatNames); ++cIdx) {
+                /* Now validate the category name.
+                   We could use a binary search, or a trie, if
+                   we really wanted to. */
+                uint8_t cat = getCharCat(cp);
+                for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) {
  
-                        if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
-                            if (getCharCat(cp) == cIdx) {
-                                return cp;
-                            }
-                            break;
+                    if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
+                        if (cat == cIdx) {
+                            return cp;
                          }
+                        break;
                      }
                  }
              }