+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
-* Copyright (c) 2002-2003, International Business Machines
+* Copyright (c) 2002-2014, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: October 30 2002
* Since: ICU 2.4
+* 2010nov19 Markus Scherer Rewrite for formatVersion 2.
**********************************************************************
*/
#include "propname.h"
#include "unicode/uchar.h"
#include "unicode/udata.h"
+#include "unicode/uscript.h"
#include "umutex.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "uarrsort.h"
+#include "uinvchar.h"
-U_NAMESPACE_BEGIN
+#define INCLUDED_FROM_PROPNAME_CPP
+#include "propname_data.h"
-//----------------------------------------------------------------------
-// PropertyAliases implementation
+U_CDECL_BEGIN
-const char*
-PropertyAliases::chooseNameInGroup(Offset offset,
- UPropertyNameChoice choice) const {
- int32_t c = choice;
- if (!offset || c < 0) {
- return NULL;
+/**
+ * Get the next non-ignorable ASCII character from a property name
+ * and lowercases it.
+ * @return ((advance count for the name)<<8)|character
+ */
+static inline int32_t
+getASCIIPropertyNameChar(const char *name) {
+ int32_t i;
+ char c;
+
+ /* Ignore delimiters '-', '_', and ASCII White_Space */
+ for(i=0;
+ (c=name[i++])==0x2d || c==0x5f ||
+ c==0x20 || (0x09<=c && c<=0x0d);
+ ) {}
+
+ if(c!=0) {
+ return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
+ } else {
+ return i<<8;
}
- const Offset* p = (const Offset*) getPointer(offset);
- while (c-- > 0) {
- if (*p++ < 0) return NULL;
+}
+
+/**
+ * Get the next non-ignorable EBCDIC character from a property name
+ * and lowercases it.
+ * @return ((advance count for the name)<<8)|character
+ */
+static inline int32_t
+getEBCDICPropertyNameChar(const char *name) {
+ int32_t i;
+ char c;
+
+ /* Ignore delimiters '-', '_', and EBCDIC White_Space */
+ for(i=0;
+ (c=name[i++])==0x60 || c==0x6d ||
+ c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
+ ) {}
+
+ if(c!=0) {
+ return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
+ } else {
+ return i<<8;
}
- Offset a = *p;
- if (a < 0) a = -a;
- return (const char*) getPointerNull(a);
}
-const ValueMap*
-PropertyAliases::getValueMap(EnumValue prop) const {
- NonContiguousEnumToOffset* e2o = (NonContiguousEnumToOffset*) getPointer(enumToValue_offset);
- Offset a = e2o->getOffset(prop);
- return (const ValueMap*) (a ? getPointerNull(a) : NULL);
+/**
+ * Unicode property names and property value names are compared "loosely".
+ *
+ * UCD.html 4.0.1 says:
+ * For all property names, property value names, and for property values for
+ * Enumerated, Binary, or Catalog properties, use the following
+ * loose matching rule:
+ *
+ * LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
+ *
+ * This function does just that, for (char *) name strings.
+ * It is almost identical to ucnv_compareNames() but also ignores
+ * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
+ *
+ * @internal
+ */
+
+U_CAPI int32_t U_EXPORT2
+uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
+ int32_t rc, r1, r2;
+
+ for(;;) {
+ r1=getASCIIPropertyNameChar(name1);
+ r2=getASCIIPropertyNameChar(name2);
+
+ /* If we reach the ends of both strings then they match */
+ if(((r1|r2)&0xff)==0) {
+ return 0;
+ }
+
+ /* Compare the lowercased characters */
+ if(r1!=r2) {
+ rc=(r1&0xff)-(r2&0xff);
+ if(rc!=0) {
+ return rc;
+ }
+ }
+
+ name1+=r1>>8;
+ name2+=r2>>8;
+ }
}
-inline const char*
-PropertyAliases::getPropertyName(EnumValue prop,
- UPropertyNameChoice choice) const {
- NonContiguousEnumToOffset* e2n = (NonContiguousEnumToOffset*) getPointer(enumToName_offset);
- return chooseNameInGroup(e2n->getOffset(prop), choice);
+U_CAPI int32_t U_EXPORT2
+uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
+ int32_t rc, r1, r2;
+
+ for(;;) {
+ r1=getEBCDICPropertyNameChar(name1);
+ r2=getEBCDICPropertyNameChar(name2);
+
+ /* If we reach the ends of both strings then they match */
+ if(((r1|r2)&0xff)==0) {
+ return 0;
+ }
+
+ /* Compare the lowercased characters */
+ if(r1!=r2) {
+ rc=(r1&0xff)-(r2&0xff);
+ if(rc!=0) {
+ return rc;
+ }
+ }
+
+ name1+=r1>>8;
+ name2+=r2>>8;
+ }
}
-inline EnumValue
-PropertyAliases::getPropertyEnum(const char* alias) const {
- NameToEnum* n2e = (NameToEnum*) getPointer(nameToEnum_offset);
- return n2e->getEnum(alias, *this);
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+int32_t PropNameData::findProperty(int32_t property) {
+ int32_t i=1; // valueMaps index, initially after numRanges
+ for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
+ // Read and skip the start and limit of this range.
+ int32_t start=valueMaps[i];
+ int32_t limit=valueMaps[i+1];
+ i+=2;
+ if(property<start) {
+ break;
+ }
+ if(property<limit) {
+ return i+(property-start)*2;
+ }
+ i+=(limit-start)*2; // Skip all entries for this range.
+ }
+ return 0;
}
-inline const char*
-PropertyAliases::getPropertyValueName(EnumValue prop,
- EnumValue value,
- UPropertyNameChoice choice) const {
- const ValueMap* vm = getValueMap(prop);
- if (!vm) return NULL;
- Offset a;
- if (vm->enumToName_offset) {
- a = ((EnumToOffset*) getPointer(vm->enumToName_offset))->
- getOffset(value);
+int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) {
+ if(valueMapIndex==0) {
+ return 0; // The property does not have named values.
+ }
+ ++valueMapIndex; // Skip the BytesTrie offset.
+ int32_t numRanges=valueMaps[valueMapIndex++];
+ if(numRanges<0x10) {
+ // Ranges of values.
+ for(; numRanges>0; --numRanges) {
+ // Read and skip the start and limit of this range.
+ int32_t start=valueMaps[valueMapIndex];
+ int32_t limit=valueMaps[valueMapIndex+1];
+ valueMapIndex+=2;
+ if(value<start) {
+ break;
+ }
+ if(value<limit) {
+ return valueMaps[valueMapIndex+value-start];
+ }
+ valueMapIndex+=limit-start; // Skip all entries for this range.
+ }
} else {
- a = ((NonContiguousEnumToOffset*) getPointer(vm->ncEnumToName_offset))->
- getOffset(value);
+ // List of values.
+ int32_t valuesStart=valueMapIndex;
+ int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
+ do {
+ int32_t v=valueMaps[valueMapIndex];
+ if(value<v) {
+ break;
+ }
+ if(value==v) {
+ return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
+ }
+ } while(++valueMapIndex<nameGroupOffsetsStart);
}
- return chooseNameInGroup(a, choice);
+ return 0;
}
-inline EnumValue
-PropertyAliases::getPropertyValueEnum(EnumValue prop,
- const char* alias) const {
- const ValueMap* vm = getValueMap(prop);
- if (!vm) return UCHAR_INVALID_CODE;
- NameToEnum* n2e = (NameToEnum*) getPointer(vm->nameToEnum_offset);
- return n2e->getEnum(alias, *this);
+const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
+ int32_t numNames=*nameGroup++;
+ if(nameIndex<0 || numNames<=nameIndex) {
+ return NULL;
+ }
+ // Skip nameIndex names.
+ for(; nameIndex>0; --nameIndex) {
+ nameGroup=uprv_strchr(nameGroup, 0)+1;
+ }
+ if(*nameGroup==0) {
+ return NULL; // no name (Property[Value]Aliases.txt has "n/a")
+ }
+ return nameGroup;
}
-U_NAMESPACE_END
+UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
+ if(name==NULL) {
+ return FALSE;
+ }
+ UStringTrieResult result=USTRINGTRIE_NO_VALUE;
+ char c;
+ while((c=*name++)!=0) {
+ c=uprv_invCharToLowercaseAscii(c);
+ // Ignore delimiters '-', '_', and ASCII White_Space.
+ if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
+ continue;
+ }
+ if(!USTRINGTRIE_HAS_NEXT(result)) {
+ return FALSE;
+ }
+ result=trie.next((uint8_t)c);
+ }
+ return USTRINGTRIE_HAS_VALUE(result);
+}
-//----------------------------------------------------------------------
-// UDataMemory structures
+const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
+ int32_t valueMapIndex=findProperty(property);
+ if(valueMapIndex==0) {
+ return NULL; // Not a known property.
+ }
+ return getName(nameGroups+valueMaps[valueMapIndex], nameChoice);
+}
-static const PropertyAliases* PNAME = NULL;
-static UDataMemory* UDATA = NULL;
+const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) {
+ int32_t valueMapIndex=findProperty(property);
+ if(valueMapIndex==0) {
+ return NULL; // Not a known property.
+ }
+ int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
+ if(nameGroupOffset==0) {
+ return NULL;
+ }
+ return getName(nameGroups+nameGroupOffset, nameChoice);
+}
-//----------------------------------------------------------------------
-// UDataMemory loading/unloading
+int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
+ BytesTrie trie(bytesTries+bytesTrieOffset);
+ if(containsName(trie, alias)) {
+ return trie.getValue();
+ } else {
+ return UCHAR_INVALID_CODE;
+ }
+}
-/**
- * udata callback to verify the zone data.
- */
-U_CDECL_BEGIN
-static UBool U_CALLCONV
-isAcceptable(void* /*context*/,
- const char* /*type*/, const char* /*name*/,
- const UDataInfo* info) {
- return
- info->size >= sizeof(UDataInfo) &&
- info->isBigEndian == U_IS_BIG_ENDIAN &&
- info->charsetFamily == U_CHARSET_FAMILY &&
- info->dataFormat[0] == PNAME_SIG_0 &&
- info->dataFormat[1] == PNAME_SIG_1 &&
- info->dataFormat[2] == PNAME_SIG_2 &&
- info->dataFormat[3] == PNAME_SIG_3 &&
- info->formatVersion[0] == PNAME_FORMAT_VERSION;
-}
-
-UBool
-pname_cleanup() {
- if (UDATA) {
- udata_close(UDATA);
- UDATA = NULL;
- }
- PNAME = NULL;
- return TRUE;
+int32_t PropNameData::getPropertyEnum(const char *alias) {
+ return getPropertyOrValueEnum(0, alias);
}
-U_CDECL_END
-/**
- * Load the property names data. Caller should check that data is
- * not loaded BEFORE calling this function. Returns TRUE if the load
- * succeeds.
- */
-static UBool _load() {
- UErrorCode ec = U_ZERO_ERROR;
- UDataMemory* data =
- udata_openChoice(0, PNAME_DATA_TYPE, PNAME_DATA_NAME,
- isAcceptable, 0, &ec);
- if (U_SUCCESS(ec)) {
- umtx_lock(NULL);
- if (UDATA == NULL) {
- UDATA = data;
- PNAME = (const PropertyAliases*) udata_getMemory(UDATA);
- data = NULL;
- }
- umtx_unlock(NULL);
+int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) {
+ int32_t valueMapIndex=findProperty(property);
+ if(valueMapIndex==0) {
+ return UCHAR_INVALID_CODE; // Not a known property.
}
- if (data) {
- udata_close(data);
+ valueMapIndex=valueMaps[valueMapIndex+1];
+ if(valueMapIndex==0) {
+ return UCHAR_INVALID_CODE; // The property does not have named values.
}
- return PNAME!=NULL;
-}
-
-/**
- * Inline function that expands to code that does a lazy load of the
- * property names data. If the data is already loaded, avoids an
- * unnecessary function call. If the data is not loaded, call _load()
- * to load it, and return TRUE if the load succeeds.
- */
-static inline UBool load() {
- umtx_lock(NULL);
- UBool f = (PNAME!=NULL);
- umtx_unlock(NULL);
- return f || _load();
+ // valueMapIndex is the start of the property's valueMap,
+ // where the first word is the BytesTrie offset.
+ return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
}
+U_NAMESPACE_END
//----------------------------------------------------------------------
// Public API implementation
-// The C API is just a thin wrapper. Each function obtains a pointer
-// to the singleton PropertyAliases, and calls the appropriate method
-// on it. If it cannot obtain a pointer, because valid data is not
-// available, then it returns NULL or UCHAR_INVALID_CODE.
-
U_CAPI const char* U_EXPORT2
u_getPropertyName(UProperty property,
UPropertyNameChoice nameChoice) {
- return load() ? PNAME->getPropertyName(property, nameChoice)
- : NULL;
+ U_NAMESPACE_USE
+ return PropNameData::getPropertyName(property, nameChoice);
}
U_CAPI UProperty U_EXPORT2
u_getPropertyEnum(const char* alias) {
- UProperty p = load() ? (UProperty) PNAME->getPropertyEnum(alias)
- : UCHAR_INVALID_CODE;
- return p;
+ U_NAMESPACE_USE
+ return (UProperty)PropNameData::getPropertyEnum(alias);
}
U_CAPI const char* U_EXPORT2
u_getPropertyValueName(UProperty property,
int32_t value,
UPropertyNameChoice nameChoice) {
- return load() ? PNAME->getPropertyValueName(property, value, nameChoice)
- : NULL;
+ U_NAMESPACE_USE
+ return PropNameData::getPropertyValueName(property, value, nameChoice);
}
U_CAPI int32_t U_EXPORT2
u_getPropertyValueEnum(UProperty property,
const char* alias) {
- return load() ? PNAME->getPropertyValueEnum(property, alias)
- : UCHAR_INVALID_CODE;
+ U_NAMESPACE_USE
+ return PropNameData::getPropertyValueEnum(property, alias);
}
-//eof
+U_CAPI const char* U_EXPORT2
+uscript_getName(UScriptCode scriptCode){
+ return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
+ U_LONG_PROPERTY_NAME);
+}
+
+U_CAPI const char* U_EXPORT2
+uscript_getShortName(UScriptCode scriptCode){
+ return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
+ U_SHORT_PROPERTY_NAME);
+}