+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
-* Copyright (C) 1999-2011, International Business Machines
+* Copyright (C) 1999-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: unames.c
-* encoding: US-ASCII
+* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
#include "unicode/udata.h"
#include "unicode/utf.h"
#include "unicode/utf16.h"
+#include "uassert.h"
#include "ustr_imp.h"
#include "umutex.h"
#include "cmemory.h"
#include "udataswp.h"
#include "uprops.h"
-/* prototypes ------------------------------------------------------------- */
+U_NAMESPACE_BEGIN
-#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+/* prototypes ------------------------------------------------------------- */
static const char DATA_NAME[] = "unames";
static const char DATA_TYPE[] = "icu";
static UDataMemory *uCharNamesData=NULL;
static UCharNames *uCharNames=NULL;
-static UErrorCode gLoadErrorCode=U_ZERO_ERROR;
+static icu::UInitOnce gCharNamesInitOnce = U_INITONCE_INITIALIZER;
/*
* Maximum length of character names (regular & 1.0).
if(uCharNames) {
uCharNames = NULL;
}
+ gCharNamesInitOnce.reset();
gMaxNameLength=0;
return TRUE;
}
pInfo->formatVersion[0]==1);
}
-static UBool
-isDataLoaded(UErrorCode *pErrorCode) {
- /* load UCharNames from file if necessary */
- UBool isCached;
-
- /* do this because double-checked locking is broken */
- UMTX_CHECK(NULL, (uCharNames!=NULL), isCached);
+static void U_CALLCONV
+loadCharNames(UErrorCode &status) {
+ U_ASSERT(uCharNamesData == NULL);
+ U_ASSERT(uCharNames == NULL);
- if(!isCached) {
- UCharNames *names;
- UDataMemory *data;
-
- /* check error code from previous attempt */
- if(U_FAILURE(gLoadErrorCode)) {
- *pErrorCode=gLoadErrorCode;
- return FALSE;
- }
-
- /* open the data outside the mutex block */
- data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- gLoadErrorCode=*pErrorCode;
- return FALSE;
- }
-
- names=(UCharNames *)udata_getMemory(data);
+ uCharNamesData = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &status);
+ if(U_FAILURE(status)) {
+ uCharNamesData = NULL;
+ } else {
+ uCharNames = (UCharNames *)udata_getMemory(uCharNamesData);
+ }
+ ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);
+}
- /* in the mutex block, set the data for this process */
- {
- umtx_lock(NULL);
- if(uCharNames==NULL) {
- uCharNamesData=data;
- uCharNames=names;
- data=NULL;
- names=NULL;
- ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);
- }
- umtx_unlock(NULL);
- }
- /* if a different thread set it first, then close the extra data */
- if(data!=NULL) {
- udata_close(data); /* NULL if it was set correctly */
- }
- }
- return TRUE;
+static UBool
+isDataLoaded(UErrorCode *pErrorCode) {
+ umtx_initOnce(gCharNamesInitOnce, &loadCharNames, *pErrorCode);
+ return U_SUCCESS(*pErrorCode);
}
#define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \
/* Return unknown if the table of names above is not up to
date. */
- if (cat >= LENGTHOF(charCatNames)) {
+ if (cat >= UPRV_LENGTHOF(charCatNames)) {
return "unknown";
} else {
return charCatNames[cat];
buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);
}
buffer += ndigits;
- length += ndigits;
+ length += static_cast<uint16_t>(ndigits);
WRITE_CHAR(buffer, bufferLength, length, '>');
return length;
calcExtNameSetsLengths(int32_t maxNameLength) {
int32_t i, length;
- for(i=0; i<LENGTHOF(charCatNames); ++i) {
+ for(i=0; i<UPRV_LENGTHOF(charCatNames); ++i) {
/*
* for each category, count the length of the category name
* plus 9=
return TRUE;
}
+U_NAMESPACE_END
+
/* public API --------------------------------------------------------------- */
+U_NAMESPACE_USE
+
U_CAPI int32_t U_EXPORT2
u_charName(UChar32 code, UCharNameChoice nameChoice,
char *buffer, int32_t bufferLength,
UErrorCode *pErrorCode) {
- AlgorithmicRange *algRange;
+ AlgorithmicRange *algRange;
uint32_t *p;
uint32_t i;
int32_t length;
uint32_t i;
UChar32 cp = 0;
char c0;
- UChar32 error = 0xffff; /* Undefined, but use this for backwards compatibility. */
+ static constexpr UChar32 error = 0xffff; /* Undefined, but use this for backwards compatibility. */
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return error;
*pErrorCode = U_ILLEGAL_CHAR_FOUND;
return error;
}
+ // i==strlen(name)==strlen(lower)==strlen(upper)
/* try extended names first */
if (lower[0] == '<') {
- if (nameChoice == U_EXTENDED_CHAR_NAME) {
- if (lower[--i] == '>') {
- for (--i; lower[i] && lower[i] != '-'; --i) {
- }
-
- if (lower[i] == '-') { /* We've got a category. */
- uint32_t cIdx;
-
- lower[i] = 0;
-
- for (++i; lower[i] != '>'; ++i) {
- if (lower[i] >= '0' && lower[i] <= '9') {
- cp = (cp << 4) + lower[i] - '0';
- } else if (lower[i] >= 'a' && lower[i] <= 'f') {
- cp = (cp << 4) + lower[i] - 'a' + 10;
- } else {
- *pErrorCode = U_ILLEGAL_CHAR_FOUND;
- return error;
- }
+ if (nameChoice == U_EXTENDED_CHAR_NAME && lower[--i] == '>') {
+ // Parse a string like "<category-HHHH>" where HHHH is a hex code point.
+ uint32_t limit = i;
+ while (i >= 3 && lower[--i] != '-') {}
+
+ // There should be 1 to 8 hex digits.
+ int32_t hexLength = limit - (i + 1);
+ if (i >= 2 && lower[i] == '-' && 1 <= hexLength && hexLength <= 8) {
+ uint32_t cIdx;
+
+ lower[i] = 0;
+
+ for (++i; i < limit; ++i) {
+ if (lower[i] >= '0' && lower[i] <= '9') {
+ cp = (cp << 4) + lower[i] - '0';
+ } else if (lower[i] >= 'a' && lower[i] <= 'f') {
+ cp = (cp << 4) + lower[i] - 'a' + 10;
+ } else {
+ *pErrorCode = U_ILLEGAL_CHAR_FOUND;
+ return error;
}
+ // Prevent signed-integer overflow and out-of-range code points.
+ if (cp > UCHAR_MAX_VALUE) {
+ *pErrorCode = U_ILLEGAL_CHAR_FOUND;
+ return error;
+ }
+ }
- /* Now validate the category name.
- We could use a binary search, or a trie, if
- we really wanted to. */
-
- for (lower[i] = 0, cIdx = 0; cIdx < LENGTHOF(charCatNames); ++cIdx) {
+ /* Now validate the category name.
+ We could use a binary search, or a trie, if
+ we really wanted to. */
+ uint8_t cat = getCharCat(cp);
+ for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) {
- if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
- if (getCharCat(cp) == cIdx) {
- return cp;
- }
- break;
+ if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
+ if (cat == cIdx) {
+ return cp;
}
+ break;
}
}
}