1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (c) 2002-2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
9 * Created: October 30 2002
11 * 2010nov19 Markus Scherer Rewrite for formatVersion 2.
12 **********************************************************************
15 #include "unicode/uchar.h"
16 #include "unicode/udata.h"
17 #include "unicode/uscript.h"
24 #define INCLUDED_FROM_PROPNAME_CPP
25 #include "propname_data.h"
30 * Get the next non-ignorable ASCII character from a property name
32 * @return ((advance count for the name)<<8)|character
35 getASCIIPropertyNameChar(const char *name
) {
39 /* Ignore delimiters '-', '_', and ASCII White_Space */
41 (c
=name
[i
++])==0x2d || c
==0x5f ||
42 c
==0x20 || (0x09<=c
&& c
<=0x0d);
46 return (i
<<8)|(uint8_t)uprv_asciitolower((char)c
);
53 * Get the next non-ignorable EBCDIC character from a property name
55 * @return ((advance count for the name)<<8)|character
58 getEBCDICPropertyNameChar(const char *name
) {
62 /* Ignore delimiters '-', '_', and EBCDIC White_Space */
64 (c
=name
[i
++])==0x60 || c
==0x6d ||
65 c
==0x40 || c
==0x05 || c
==0x15 || c
==0x25 || c
==0x0b || c
==0x0c || c
==0x0d;
69 return (i
<<8)|(uint8_t)uprv_ebcdictolower((char)c
);
76 * Unicode property names and property value names are compared "loosely".
78 * UCD.html 4.0.1 says:
79 * For all property names, property value names, and for property values for
80 * Enumerated, Binary, or Catalog properties, use the following
81 * loose matching rule:
83 * LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
85 * This function does just that, for (char *) name strings.
86 * It is almost identical to ucnv_compareNames() but also ignores
87 * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
92 U_CAPI
int32_t U_EXPORT2
93 uprv_compareASCIIPropertyNames(const char *name1
, const char *name2
) {
97 r1
=getASCIIPropertyNameChar(name1
);
98 r2
=getASCIIPropertyNameChar(name2
);
100 /* If we reach the ends of both strings then they match */
101 if(((r1
|r2
)&0xff)==0) {
105 /* Compare the lowercased characters */
107 rc
=(r1
&0xff)-(r2
&0xff);
118 U_CAPI
int32_t U_EXPORT2
119 uprv_compareEBCDICPropertyNames(const char *name1
, const char *name2
) {
123 r1
=getEBCDICPropertyNameChar(name1
);
124 r2
=getEBCDICPropertyNameChar(name2
);
126 /* If we reach the ends of both strings then they match */
127 if(((r1
|r2
)&0xff)==0) {
131 /* Compare the lowercased characters */
133 rc
=(r1
&0xff)-(r2
&0xff);
148 int32_t PropNameData::findProperty(int32_t property
) {
149 int32_t i
=1; // valueMaps index, initially after numRanges
150 for(int32_t numRanges
=valueMaps
[0]; numRanges
>0; --numRanges
) {
151 // Read and skip the start and limit of this range.
152 int32_t start
=valueMaps
[i
];
153 int32_t limit
=valueMaps
[i
+1];
159 return i
+(property
-start
)*2;
161 i
+=(limit
-start
)*2; // Skip all entries for this range.
166 int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex
, int32_t value
) {
167 if(valueMapIndex
==0) {
168 return 0; // The property does not have named values.
170 ++valueMapIndex
; // Skip the BytesTrie offset.
171 int32_t numRanges
=valueMaps
[valueMapIndex
++];
174 for(; numRanges
>0; --numRanges
) {
175 // Read and skip the start and limit of this range.
176 int32_t start
=valueMaps
[valueMapIndex
];
177 int32_t limit
=valueMaps
[valueMapIndex
+1];
183 return valueMaps
[valueMapIndex
+value
-start
];
185 valueMapIndex
+=limit
-start
; // Skip all entries for this range.
189 int32_t valuesStart
=valueMapIndex
;
190 int32_t nameGroupOffsetsStart
=valueMapIndex
+numRanges
-0x10;
192 int32_t v
=valueMaps
[valueMapIndex
];
197 return valueMaps
[nameGroupOffsetsStart
+valueMapIndex
-valuesStart
];
199 } while(++valueMapIndex
<nameGroupOffsetsStart
);
204 const char *PropNameData::getName(const char *nameGroup
, int32_t nameIndex
) {
205 int32_t numNames
=*nameGroup
++;
206 if(nameIndex
<0 || numNames
<=nameIndex
) {
209 // Skip nameIndex names.
210 for(; nameIndex
>0; --nameIndex
) {
211 nameGroup
=uprv_strchr(nameGroup
, 0)+1;
214 return NULL
; // no name (Property[Value]Aliases.txt has "n/a")
219 UBool
PropNameData::containsName(BytesTrie
&trie
, const char *name
) {
223 UStringTrieResult result
=USTRINGTRIE_NO_VALUE
;
225 while((c
=*name
++)!=0) {
226 c
=uprv_invCharToLowercaseAscii(c
);
227 // Ignore delimiters '-', '_', and ASCII White_Space.
228 if(c
==0x2d || c
==0x5f || c
==0x20 || (0x09<=c
&& c
<=0x0d)) {
231 if(!USTRINGTRIE_HAS_NEXT(result
)) {
234 result
=trie
.next((uint8_t)c
);
236 return USTRINGTRIE_HAS_VALUE(result
);
239 const char *PropNameData::getPropertyName(int32_t property
, int32_t nameChoice
) {
240 int32_t valueMapIndex
=findProperty(property
);
241 if(valueMapIndex
==0) {
242 return NULL
; // Not a known property.
244 return getName(nameGroups
+valueMaps
[valueMapIndex
], nameChoice
);
247 const char *PropNameData::getPropertyValueName(int32_t property
, int32_t value
, int32_t nameChoice
) {
248 int32_t valueMapIndex
=findProperty(property
);
249 if(valueMapIndex
==0) {
250 return NULL
; // Not a known property.
252 int32_t nameGroupOffset
=findPropertyValueNameGroup(valueMaps
[valueMapIndex
+1], value
);
253 if(nameGroupOffset
==0) {
256 return getName(nameGroups
+nameGroupOffset
, nameChoice
);
259 int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset
, const char *alias
) {
260 BytesTrie
trie(bytesTries
+bytesTrieOffset
);
261 if(containsName(trie
, alias
)) {
262 return trie
.getValue();
264 return UCHAR_INVALID_CODE
;
268 int32_t PropNameData::getPropertyEnum(const char *alias
) {
269 return getPropertyOrValueEnum(0, alias
);
272 int32_t PropNameData::getPropertyValueEnum(int32_t property
, const char *alias
) {
273 int32_t valueMapIndex
=findProperty(property
);
274 if(valueMapIndex
==0) {
275 return UCHAR_INVALID_CODE
; // Not a known property.
277 valueMapIndex
=valueMaps
[valueMapIndex
+1];
278 if(valueMapIndex
==0) {
279 return UCHAR_INVALID_CODE
; // The property does not have named values.
281 // valueMapIndex is the start of the property's valueMap,
282 // where the first word is the BytesTrie offset.
283 return getPropertyOrValueEnum(valueMaps
[valueMapIndex
], alias
);
287 //----------------------------------------------------------------------
288 // Public API implementation
290 U_CAPI
const char* U_EXPORT2
291 u_getPropertyName(UProperty property
,
292 UPropertyNameChoice nameChoice
) {
294 return PropNameData::getPropertyName(property
, nameChoice
);
297 U_CAPI UProperty U_EXPORT2
298 u_getPropertyEnum(const char* alias
) {
300 return (UProperty
)PropNameData::getPropertyEnum(alias
);
303 U_CAPI
const char* U_EXPORT2
304 u_getPropertyValueName(UProperty property
,
306 UPropertyNameChoice nameChoice
) {
308 return PropNameData::getPropertyValueName(property
, value
, nameChoice
);
311 U_CAPI
int32_t U_EXPORT2
312 u_getPropertyValueEnum(UProperty property
,
315 return PropNameData::getPropertyValueEnum(property
, alias
);
318 U_CAPI
const char* U_EXPORT2
319 uscript_getName(UScriptCode scriptCode
){
320 return u_getPropertyValueName(UCHAR_SCRIPT
, scriptCode
,
321 U_LONG_PROPERTY_NAME
);
324 U_CAPI
const char* U_EXPORT2
325 uscript_getShortName(UScriptCode scriptCode
){
326 return u_getPropertyValueName(UCHAR_SCRIPT
, scriptCode
,
327 U_SHORT_PROPERTY_NAME
);