2 **********************************************************************
3 * Copyright (c) 2002-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
7 * Created: October 30 2002
9 * 2010nov19 Markus Scherer Rewrite for formatVersion 2.
10 **********************************************************************
13 #include "unicode/uchar.h"
14 #include "unicode/udata.h"
15 #include "unicode/uscript.h"
23 #define INCLUDED_FROM_PROPNAME_CPP
24 #include "propname_data.h"
29 * Get the next non-ignorable ASCII character from a property name
31 * @return ((advance count for the name)<<8)|character
34 getASCIIPropertyNameChar(const char *name
) {
38 /* Ignore delimiters '-', '_', and ASCII White_Space */
40 (c
=name
[i
++])==0x2d || c
==0x5f ||
41 c
==0x20 || (0x09<=c
&& c
<=0x0d);
45 return (i
<<8)|(uint8_t)uprv_asciitolower((char)c
);
52 * Get the next non-ignorable EBCDIC character from a property name
54 * @return ((advance count for the name)<<8)|character
57 getEBCDICPropertyNameChar(const char *name
) {
61 /* Ignore delimiters '-', '_', and EBCDIC White_Space */
63 (c
=name
[i
++])==0x60 || c
==0x6d ||
64 c
==0x40 || c
==0x05 || c
==0x15 || c
==0x25 || c
==0x0b || c
==0x0c || c
==0x0d;
68 return (i
<<8)|(uint8_t)uprv_ebcdictolower((char)c
);
75 * Unicode property names and property value names are compared "loosely".
77 * UCD.html 4.0.1 says:
78 * For all property names, property value names, and for property values for
79 * Enumerated, Binary, or Catalog properties, use the following
80 * loose matching rule:
82 * LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
84 * This function does just that, for (char *) name strings.
85 * It is almost identical to ucnv_compareNames() but also ignores
86 * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
91 U_CAPI
int32_t U_EXPORT2
92 uprv_compareASCIIPropertyNames(const char *name1
, const char *name2
) {
96 r1
=getASCIIPropertyNameChar(name1
);
97 r2
=getASCIIPropertyNameChar(name2
);
99 /* If we reach the ends of both strings then they match */
100 if(((r1
|r2
)&0xff)==0) {
104 /* Compare the lowercased characters */
106 rc
=(r1
&0xff)-(r2
&0xff);
117 U_CAPI
int32_t U_EXPORT2
118 uprv_compareEBCDICPropertyNames(const char *name1
, const char *name2
) {
122 r1
=getEBCDICPropertyNameChar(name1
);
123 r2
=getEBCDICPropertyNameChar(name2
);
125 /* If we reach the ends of both strings then they match */
126 if(((r1
|r2
)&0xff)==0) {
130 /* Compare the lowercased characters */
132 rc
=(r1
&0xff)-(r2
&0xff);
147 int32_t PropNameData::findProperty(int32_t property
) {
148 int32_t i
=1; // valueMaps index, initially after numRanges
149 for(int32_t numRanges
=valueMaps
[0]; numRanges
>0; --numRanges
) {
150 // Read and skip the start and limit of this range.
151 int32_t start
=valueMaps
[i
];
152 int32_t limit
=valueMaps
[i
+1];
158 return i
+(property
-start
)*2;
160 i
+=(limit
-start
)*2; // Skip all entries for this range.
165 int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex
, int32_t value
) {
166 if(valueMapIndex
==0) {
167 return 0; // The property does not have named values.
169 ++valueMapIndex
; // Skip the BytesTrie offset.
170 int32_t numRanges
=valueMaps
[valueMapIndex
++];
173 for(; numRanges
>0; --numRanges
) {
174 // Read and skip the start and limit of this range.
175 int32_t start
=valueMaps
[valueMapIndex
];
176 int32_t limit
=valueMaps
[valueMapIndex
+1];
182 return valueMaps
[valueMapIndex
+value
-start
];
184 valueMapIndex
+=limit
-start
; // Skip all entries for this range.
188 int32_t valuesStart
=valueMapIndex
;
189 int32_t nameGroupOffsetsStart
=valueMapIndex
+numRanges
-0x10;
191 int32_t v
=valueMaps
[valueMapIndex
];
196 return valueMaps
[nameGroupOffsetsStart
+valueMapIndex
-valuesStart
];
198 } while(++valueMapIndex
<nameGroupOffsetsStart
);
203 const char *PropNameData::getName(const char *nameGroup
, int32_t nameIndex
) {
204 int32_t numNames
=*nameGroup
++;
205 if(nameIndex
<0 || numNames
<=nameIndex
) {
208 // Skip nameIndex names.
209 for(; nameIndex
>0; --nameIndex
) {
210 nameGroup
=uprv_strchr(nameGroup
, 0)+1;
213 return NULL
; // no name (Property[Value]Aliases.txt has "n/a")
218 UBool
PropNameData::containsName(BytesTrie
&trie
, const char *name
) {
222 UStringTrieResult result
=USTRINGTRIE_NO_VALUE
;
224 while((c
=*name
++)!=0) {
225 c
=uprv_invCharToLowercaseAscii(c
);
226 // Ignore delimiters '-', '_', and ASCII White_Space.
227 if(c
==0x2d || c
==0x5f || c
==0x20 || (0x09<=c
&& c
<=0x0d)) {
230 if(!USTRINGTRIE_HAS_NEXT(result
)) {
233 result
=trie
.next((uint8_t)c
);
235 return USTRINGTRIE_HAS_VALUE(result
);
238 const char *PropNameData::getPropertyName(int32_t property
, int32_t nameChoice
) {
239 int32_t valueMapIndex
=findProperty(property
);
240 if(valueMapIndex
==0) {
241 return NULL
; // Not a known property.
243 return getName(nameGroups
+valueMaps
[valueMapIndex
], nameChoice
);
246 const char *PropNameData::getPropertyValueName(int32_t property
, int32_t value
, int32_t nameChoice
) {
247 int32_t valueMapIndex
=findProperty(property
);
248 if(valueMapIndex
==0) {
249 return NULL
; // Not a known property.
251 int32_t nameGroupOffset
=findPropertyValueNameGroup(valueMaps
[valueMapIndex
+1], value
);
252 if(nameGroupOffset
==0) {
255 return getName(nameGroups
+nameGroupOffset
, nameChoice
);
258 int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset
, const char *alias
) {
259 BytesTrie
trie(bytesTries
+bytesTrieOffset
);
260 if(containsName(trie
, alias
)) {
261 return trie
.getValue();
263 return UCHAR_INVALID_CODE
;
267 int32_t PropNameData::getPropertyEnum(const char *alias
) {
268 return getPropertyOrValueEnum(0, alias
);
271 int32_t PropNameData::getPropertyValueEnum(int32_t property
, const char *alias
) {
272 int32_t valueMapIndex
=findProperty(property
);
273 if(valueMapIndex
==0) {
274 return UCHAR_INVALID_CODE
; // Not a known property.
276 valueMapIndex
=valueMaps
[valueMapIndex
+1];
277 if(valueMapIndex
==0) {
278 return UCHAR_INVALID_CODE
; // The property does not have named values.
280 // valueMapIndex is the start of the property's valueMap,
281 // where the first word is the BytesTrie offset.
282 return getPropertyOrValueEnum(valueMaps
[valueMapIndex
], alias
);
286 //----------------------------------------------------------------------
287 // Public API implementation
289 U_CAPI
const char* U_EXPORT2
290 u_getPropertyName(UProperty property
,
291 UPropertyNameChoice nameChoice
) {
293 return PropNameData::getPropertyName(property
, nameChoice
);
296 U_CAPI UProperty U_EXPORT2
297 u_getPropertyEnum(const char* alias
) {
299 return (UProperty
)PropNameData::getPropertyEnum(alias
);
302 U_CAPI
const char* U_EXPORT2
303 u_getPropertyValueName(UProperty property
,
305 UPropertyNameChoice nameChoice
) {
307 return PropNameData::getPropertyValueName(property
, value
, nameChoice
);
310 U_CAPI
int32_t U_EXPORT2
311 u_getPropertyValueEnum(UProperty property
,
314 return PropNameData::getPropertyValueEnum(property
, alias
);
317 U_CAPI
const char* U_EXPORT2
318 uscript_getName(UScriptCode scriptCode
){
319 return u_getPropertyValueName(UCHAR_SCRIPT
, scriptCode
,
320 U_LONG_PROPERTY_NAME
);
323 U_CAPI
const char* U_EXPORT2
324 uscript_getShortName(UScriptCode scriptCode
){
325 return u_getPropertyValueName(UCHAR_SCRIPT
, scriptCode
,
326 U_SHORT_PROPERTY_NAME
);