X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/4388f060552cc537e71e957d32f35e9d75a61233..a01113dcd0f39d5da295ef82785beff9ed86fe38:/icuSources/common/unames.cpp diff --git a/icuSources/common/unames.cpp b/icuSources/common/unames.cpp index 932d2024..d9f61cac 100644 --- a/icuSources/common/unames.cpp +++ b/icuSources/common/unames.cpp @@ -1,12 +1,14 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** * -* Copyright (C) 1999-2011, International Business Machines +* Copyright (C) 1999-2014, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** * file name: unames.c -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -20,6 +22,7 @@ #include "unicode/udata.h" #include "unicode/utf.h" #include "unicode/utf16.h" +#include "uassert.h" #include "ustr_imp.h" #include "umutex.h" #include "cmemory.h" @@ -28,9 +31,9 @@ #include "udataswp.h" #include "uprops.h" -/* prototypes ------------------------------------------------------------- */ +U_NAMESPACE_BEGIN -#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) +/* prototypes ------------------------------------------------------------- */ static const char DATA_NAME[] = "unames"; static const char DATA_TYPE[] = "icu"; @@ -102,7 +105,7 @@ typedef struct { static UDataMemory *uCharNamesData=NULL; static UCharNames *uCharNames=NULL; -static UErrorCode gLoadErrorCode=U_ZERO_ERROR; +static icu::UInitOnce gCharNamesInitOnce = U_INITONCE_INITIALIZER; /* * Maximum length of character names (regular & 1.0). @@ -168,6 +171,7 @@ static UBool U_CALLCONV unames_cleanup(void) if(uCharNames) { uCharNames = NULL; } + gCharNamesInitOnce.reset(); gMaxNameLength=0; return TRUE; } @@ -187,52 +191,25 @@ isAcceptable(void * /*context*/, pInfo->formatVersion[0]==1); } -static UBool -isDataLoaded(UErrorCode *pErrorCode) { - /* load UCharNames from file if necessary */ - UBool isCached; - - /* do this because double-checked locking is broken */ - UMTX_CHECK(NULL, (uCharNames!=NULL), isCached); +static void U_CALLCONV +loadCharNames(UErrorCode &status) { + U_ASSERT(uCharNamesData == NULL); + U_ASSERT(uCharNames == NULL); - if(!isCached) { - UCharNames *names; - UDataMemory *data; - - /* check error code from previous attempt */ - if(U_FAILURE(gLoadErrorCode)) { - *pErrorCode=gLoadErrorCode; - return FALSE; - } - - /* open the data outside the mutex block */ - data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - gLoadErrorCode=*pErrorCode; - return FALSE; - } - - names=(UCharNames *)udata_getMemory(data); + uCharNamesData = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &status); + if(U_FAILURE(status)) { + uCharNamesData = NULL; + } else { + uCharNames = (UCharNames *)udata_getMemory(uCharNamesData); + } + ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup); +} - /* in the mutex block, set the data for this process */ - { - umtx_lock(NULL); - if(uCharNames==NULL) { - uCharNamesData=data; - uCharNames=names; - data=NULL; - names=NULL; - ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup); - } - umtx_unlock(NULL); - } - /* if a different thread set it first, then close the extra data */ - if(data!=NULL) { - udata_close(data); /* NULL if it was set correctly */ - } - } - return TRUE; +static UBool +isDataLoaded(UErrorCode *pErrorCode) { + umtx_initOnce(gCharNamesInitOnce, &loadCharNames, *pErrorCode); + return U_SUCCESS(*pErrorCode); } #define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \ @@ -461,7 +438,7 @@ static const char *getCharCatName(UChar32 cp) { /* Return unknown if the table of names above is not up to date. */ - if (cat >= LENGTHOF(charCatNames)) { + if (cat >= UPRV_LENGTHOF(charCatNames)) { return "unknown"; } else { return charCatNames[cat]; @@ -489,7 +466,7 @@ static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) { buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10); } buffer += ndigits; - length += ndigits; + length += static_cast(ndigits); WRITE_CHAR(buffer, bufferLength, length, '>'); return length; @@ -1300,7 +1277,7 @@ static int32_t calcExtNameSetsLengths(int32_t maxNameLength) { int32_t i, length; - for(i=0; i') { - for (--i; lower[i] && lower[i] != '-'; --i) { - } - - if (lower[i] == '-') { /* We've got a category. */ - uint32_t cIdx; - - lower[i] = 0; - - for (++i; lower[i] != '>'; ++i) { - if (lower[i] >= '0' && lower[i] <= '9') { - cp = (cp << 4) + lower[i] - '0'; - } else if (lower[i] >= 'a' && lower[i] <= 'f') { - cp = (cp << 4) + lower[i] - 'a' + 10; - } else { - *pErrorCode = U_ILLEGAL_CHAR_FOUND; - return error; - } + if (nameChoice == U_EXTENDED_CHAR_NAME && lower[--i] == '>') { + // Parse a string like "" where HHHH is a hex code point. + uint32_t limit = i; + while (i >= 3 && lower[--i] != '-') {} + + // There should be 1 to 8 hex digits. + int32_t hexLength = limit - (i + 1); + if (i >= 2 && lower[i] == '-' && 1 <= hexLength && hexLength <= 8) { + uint32_t cIdx; + + lower[i] = 0; + + for (++i; i < limit; ++i) { + if (lower[i] >= '0' && lower[i] <= '9') { + cp = (cp << 4) + lower[i] - '0'; + } else if (lower[i] >= 'a' && lower[i] <= 'f') { + cp = (cp << 4) + lower[i] - 'a' + 10; + } else { + *pErrorCode = U_ILLEGAL_CHAR_FOUND; + return error; } + // Prevent signed-integer overflow and out-of-range code points. + if (cp > UCHAR_MAX_VALUE) { + *pErrorCode = U_ILLEGAL_CHAR_FOUND; + return error; + } + } - /* Now validate the category name. - We could use a binary search, or a trie, if - we really wanted to. */ - - for (lower[i] = 0, cIdx = 0; cIdx < LENGTHOF(charCatNames); ++cIdx) { + /* Now validate the category name. + We could use a binary search, or a trie, if + we really wanted to. */ + uint8_t cat = getCharCat(cp); + for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) { - if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) { - if (getCharCat(cp) == cIdx) { - return cp; - } - break; + if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) { + if (cat == cIdx) { + return cp; } + break; } } }