2 **********************************************************************
3 * Copyright (c) 2002-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
7 * Created: October 30 2002
9 * 2010nov19 Markus Scherer Rewrite for formatVersion 2.
10 **********************************************************************
13 #include "unicode/uchar.h"
14 #include "unicode/udata.h"
15 #include "unicode/uscript.h"
22 #define INCLUDED_FROM_PROPNAME_CPP
23 #include "propname_data.h"
28 * Get the next non-ignorable ASCII character from a property name
30 * @return ((advance count for the name)<<8)|character
33 getASCIIPropertyNameChar(const char *name
) {
37 /* Ignore delimiters '-', '_', and ASCII White_Space */
39 (c
=name
[i
++])==0x2d || c
==0x5f ||
40 c
==0x20 || (0x09<=c
&& c
<=0x0d);
44 return (i
<<8)|(uint8_t)uprv_asciitolower((char)c
);
51 * Get the next non-ignorable EBCDIC character from a property name
53 * @return ((advance count for the name)<<8)|character
56 getEBCDICPropertyNameChar(const char *name
) {
60 /* Ignore delimiters '-', '_', and EBCDIC White_Space */
62 (c
=name
[i
++])==0x60 || c
==0x6d ||
63 c
==0x40 || c
==0x05 || c
==0x15 || c
==0x25 || c
==0x0b || c
==0x0c || c
==0x0d;
67 return (i
<<8)|(uint8_t)uprv_ebcdictolower((char)c
);
74 * Unicode property names and property value names are compared "loosely".
76 * UCD.html 4.0.1 says:
77 * For all property names, property value names, and for property values for
78 * Enumerated, Binary, or Catalog properties, use the following
79 * loose matching rule:
81 * LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
83 * This function does just that, for (char *) name strings.
84 * It is almost identical to ucnv_compareNames() but also ignores
85 * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
90 U_CAPI
int32_t U_EXPORT2
91 uprv_compareASCIIPropertyNames(const char *name1
, const char *name2
) {
95 r1
=getASCIIPropertyNameChar(name1
);
96 r2
=getASCIIPropertyNameChar(name2
);
98 /* If we reach the ends of both strings then they match */
99 if(((r1
|r2
)&0xff)==0) {
103 /* Compare the lowercased characters */
105 rc
=(r1
&0xff)-(r2
&0xff);
116 U_CAPI
int32_t U_EXPORT2
117 uprv_compareEBCDICPropertyNames(const char *name1
, const char *name2
) {
121 r1
=getEBCDICPropertyNameChar(name1
);
122 r2
=getEBCDICPropertyNameChar(name2
);
124 /* If we reach the ends of both strings then they match */
125 if(((r1
|r2
)&0xff)==0) {
129 /* Compare the lowercased characters */
131 rc
=(r1
&0xff)-(r2
&0xff);
146 int32_t PropNameData::findProperty(int32_t property
) {
147 int32_t i
=1; // valueMaps index, initially after numRanges
148 for(int32_t numRanges
=valueMaps
[0]; numRanges
>0; --numRanges
) {
149 // Read and skip the start and limit of this range.
150 int32_t start
=valueMaps
[i
];
151 int32_t limit
=valueMaps
[i
+1];
157 return i
+(property
-start
)*2;
159 i
+=(limit
-start
)*2; // Skip all entries for this range.
164 int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex
, int32_t value
) {
165 if(valueMapIndex
==0) {
166 return 0; // The property does not have named values.
168 ++valueMapIndex
; // Skip the BytesTrie offset.
169 int32_t numRanges
=valueMaps
[valueMapIndex
++];
172 for(; numRanges
>0; --numRanges
) {
173 // Read and skip the start and limit of this range.
174 int32_t start
=valueMaps
[valueMapIndex
];
175 int32_t limit
=valueMaps
[valueMapIndex
+1];
181 return valueMaps
[valueMapIndex
+value
-start
];
183 valueMapIndex
+=limit
-start
; // Skip all entries for this range.
187 int32_t valuesStart
=valueMapIndex
;
188 int32_t nameGroupOffsetsStart
=valueMapIndex
+numRanges
-0x10;
190 int32_t v
=valueMaps
[valueMapIndex
];
195 return valueMaps
[nameGroupOffsetsStart
+valueMapIndex
-valuesStart
];
197 } while(++valueMapIndex
<nameGroupOffsetsStart
);
202 const char *PropNameData::getName(const char *nameGroup
, int32_t nameIndex
) {
203 int32_t numNames
=*nameGroup
++;
204 if(nameIndex
<0 || numNames
<=nameIndex
) {
207 // Skip nameIndex names.
208 for(; nameIndex
>0; --nameIndex
) {
209 nameGroup
=uprv_strchr(nameGroup
, 0)+1;
212 return NULL
; // no name (Property[Value]Aliases.txt has "n/a")
217 UBool
PropNameData::containsName(BytesTrie
&trie
, const char *name
) {
221 UStringTrieResult result
=USTRINGTRIE_NO_VALUE
;
223 while((c
=*name
++)!=0) {
224 c
=uprv_invCharToLowercaseAscii(c
);
225 // Ignore delimiters '-', '_', and ASCII White_Space.
226 if(c
==0x2d || c
==0x5f || c
==0x20 || (0x09<=c
&& c
<=0x0d)) {
229 if(!USTRINGTRIE_HAS_NEXT(result
)) {
232 result
=trie
.next((uint8_t)c
);
234 return USTRINGTRIE_HAS_VALUE(result
);
237 const char *PropNameData::getPropertyName(int32_t property
, int32_t nameChoice
) {
238 int32_t valueMapIndex
=findProperty(property
);
239 if(valueMapIndex
==0) {
240 return NULL
; // Not a known property.
242 return getName(nameGroups
+valueMaps
[valueMapIndex
], nameChoice
);
245 const char *PropNameData::getPropertyValueName(int32_t property
, int32_t value
, int32_t nameChoice
) {
246 int32_t valueMapIndex
=findProperty(property
);
247 if(valueMapIndex
==0) {
248 return NULL
; // Not a known property.
250 int32_t nameGroupOffset
=findPropertyValueNameGroup(valueMaps
[valueMapIndex
+1], value
);
251 if(nameGroupOffset
==0) {
254 return getName(nameGroups
+nameGroupOffset
, nameChoice
);
257 int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset
, const char *alias
) {
258 BytesTrie
trie(bytesTries
+bytesTrieOffset
);
259 if(containsName(trie
, alias
)) {
260 return trie
.getValue();
262 return UCHAR_INVALID_CODE
;
266 int32_t PropNameData::getPropertyEnum(const char *alias
) {
267 return getPropertyOrValueEnum(0, alias
);
270 int32_t PropNameData::getPropertyValueEnum(int32_t property
, const char *alias
) {
271 int32_t valueMapIndex
=findProperty(property
);
272 if(valueMapIndex
==0) {
273 return UCHAR_INVALID_CODE
; // Not a known property.
275 valueMapIndex
=valueMaps
[valueMapIndex
+1];
276 if(valueMapIndex
==0) {
277 return UCHAR_INVALID_CODE
; // The property does not have named values.
279 // valueMapIndex is the start of the property's valueMap,
280 // where the first word is the BytesTrie offset.
281 return getPropertyOrValueEnum(valueMaps
[valueMapIndex
], alias
);
285 //----------------------------------------------------------------------
286 // Public API implementation
288 U_CAPI
const char* U_EXPORT2
289 u_getPropertyName(UProperty property
,
290 UPropertyNameChoice nameChoice
) {
292 return PropNameData::getPropertyName(property
, nameChoice
);
295 U_CAPI UProperty U_EXPORT2
296 u_getPropertyEnum(const char* alias
) {
298 return (UProperty
)PropNameData::getPropertyEnum(alias
);
301 U_CAPI
const char* U_EXPORT2
302 u_getPropertyValueName(UProperty property
,
304 UPropertyNameChoice nameChoice
) {
306 return PropNameData::getPropertyValueName(property
, value
, nameChoice
);
309 U_CAPI
int32_t U_EXPORT2
310 u_getPropertyValueEnum(UProperty property
,
313 return PropNameData::getPropertyValueEnum(property
, alias
);
316 U_CAPI
const char* U_EXPORT2
317 uscript_getName(UScriptCode scriptCode
){
318 return u_getPropertyValueName(UCHAR_SCRIPT
, scriptCode
,
319 U_LONG_PROPERTY_NAME
);
322 U_CAPI
const char* U_EXPORT2
323 uscript_getShortName(UScriptCode scriptCode
){
324 return u_getPropertyValueName(UCHAR_SCRIPT
, scriptCode
,
325 U_SHORT_PROPERTY_NAME
);