]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/propname.cpp
ICU-57165.0.1.tar.gz
[apple/icu.git] / icuSources / common / propname.cpp
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
b331163b 3* Copyright (c) 2002-2014, International Business Machines
b75a7d8f
A
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6* Author: Alan Liu
7* Created: October 30 2002
8* Since: ICU 2.4
4388f060 9* 2010nov19 Markus Scherer Rewrite for formatVersion 2.
b75a7d8f
A
10**********************************************************************
11*/
12#include "propname.h"
13#include "unicode/uchar.h"
14#include "unicode/udata.h"
4388f060 15#include "unicode/uscript.h"
b75a7d8f 16#include "umutex.h"
374ca955
A
17#include "cmemory.h"
18#include "cstring.h"
374ca955 19#include "uarrsort.h"
4388f060
A
20#include "uinvchar.h"
21
22#define INCLUDED_FROM_PROPNAME_CPP
23#include "propname_data.h"
374ca955
A
24
25U_CDECL_BEGIN
26
27/**
28 * Get the next non-ignorable ASCII character from a property name
29 * and lowercases it.
30 * @return ((advance count for the name)<<8)|character
31 */
32static inline int32_t
33getASCIIPropertyNameChar(const char *name) {
34 int32_t i;
35 char c;
36
37 /* Ignore delimiters '-', '_', and ASCII White_Space */
38 for(i=0;
39 (c=name[i++])==0x2d || c==0x5f ||
40 c==0x20 || (0x09<=c && c<=0x0d);
41 ) {}
42
43 if(c!=0) {
44 return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
45 } else {
46 return i<<8;
47 }
48}
49
50/**
51 * Get the next non-ignorable EBCDIC character from a property name
52 * and lowercases it.
53 * @return ((advance count for the name)<<8)|character
54 */
55static inline int32_t
56getEBCDICPropertyNameChar(const char *name) {
57 int32_t i;
58 char c;
59
60 /* Ignore delimiters '-', '_', and EBCDIC White_Space */
61 for(i=0;
62 (c=name[i++])==0x60 || c==0x6d ||
63 c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
64 ) {}
65
66 if(c!=0) {
67 return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
68 } else {
69 return i<<8;
70 }
71}
72
73/**
74 * Unicode property names and property value names are compared "loosely".
75 *
76 * UCD.html 4.0.1 says:
77 * For all property names, property value names, and for property values for
78 * Enumerated, Binary, or Catalog properties, use the following
79 * loose matching rule:
80 *
81 * LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
82 *
83 * This function does just that, for (char *) name strings.
84 * It is almost identical to ucnv_compareNames() but also ignores
85 * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
86 *
87 * @internal
88 */
89
90U_CAPI int32_t U_EXPORT2
91uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
92 int32_t rc, r1, r2;
93
94 for(;;) {
95 r1=getASCIIPropertyNameChar(name1);
96 r2=getASCIIPropertyNameChar(name2);
97
98 /* If we reach the ends of both strings then they match */
99 if(((r1|r2)&0xff)==0) {
100 return 0;
101 }
4388f060 102
374ca955
A
103 /* Compare the lowercased characters */
104 if(r1!=r2) {
105 rc=(r1&0xff)-(r2&0xff);
106 if(rc!=0) {
107 return rc;
108 }
109 }
110
111 name1+=r1>>8;
112 name2+=r2>>8;
113 }
114}
115
116U_CAPI int32_t U_EXPORT2
117uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
118 int32_t rc, r1, r2;
119
120 for(;;) {
121 r1=getEBCDICPropertyNameChar(name1);
122 r2=getEBCDICPropertyNameChar(name2);
123
124 /* If we reach the ends of both strings then they match */
125 if(((r1|r2)&0xff)==0) {
126 return 0;
127 }
4388f060 128
374ca955
A
129 /* Compare the lowercased characters */
130 if(r1!=r2) {
131 rc=(r1&0xff)-(r2&0xff);
132 if(rc!=0) {
133 return rc;
134 }
135 }
136
137 name1+=r1>>8;
138 name2+=r2>>8;
139 }
140}
141
142U_CDECL_END
b75a7d8f
A
143
144U_NAMESPACE_BEGIN
145
4388f060
A
146int32_t PropNameData::findProperty(int32_t property) {
147 int32_t i=1; // valueMaps index, initially after numRanges
148 for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
149 // Read and skip the start and limit of this range.
150 int32_t start=valueMaps[i];
151 int32_t limit=valueMaps[i+1];
152 i+=2;
153 if(property<start) {
154 break;
155 }
156 if(property<limit) {
157 return i+(property-start)*2;
158 }
159 i+=(limit-start)*2; // Skip all entries for this range.
b75a7d8f 160 }
4388f060 161 return 0;
b75a7d8f
A
162}
163
4388f060
A
164int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) {
165 if(valueMapIndex==0) {
166 return 0; // The property does not have named values.
167 }
168 ++valueMapIndex; // Skip the BytesTrie offset.
169 int32_t numRanges=valueMaps[valueMapIndex++];
170 if(numRanges<0x10) {
171 // Ranges of values.
172 for(; numRanges>0; --numRanges) {
173 // Read and skip the start and limit of this range.
174 int32_t start=valueMaps[valueMapIndex];
175 int32_t limit=valueMaps[valueMapIndex+1];
176 valueMapIndex+=2;
177 if(value<start) {
178 break;
179 }
180 if(value<limit) {
181 return valueMaps[valueMapIndex+value-start];
182 }
183 valueMapIndex+=limit-start; // Skip all entries for this range.
184 }
185 } else {
186 // List of values.
187 int32_t valuesStart=valueMapIndex;
188 int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
189 do {
190 int32_t v=valueMaps[valueMapIndex];
191 if(value<v) {
192 break;
193 }
194 if(value==v) {
195 return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
196 }
197 } while(++valueMapIndex<nameGroupOffsetsStart);
198 }
199 return 0;
b75a7d8f
A
200}
201
4388f060
A
202const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
203 int32_t numNames=*nameGroup++;
204 if(nameIndex<0 || numNames<=nameIndex) {
205 return NULL;
206 }
207 // Skip nameIndex names.
208 for(; nameIndex>0; --nameIndex) {
209 nameGroup=uprv_strchr(nameGroup, 0)+1;
210 }
211 if(*nameGroup==0) {
212 return NULL; // no name (Property[Value]Aliases.txt has "n/a")
213 }
214 return nameGroup;
b75a7d8f
A
215}
216
4388f060
A
217UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
218 if(name==NULL) {
219 return FALSE;
220 }
221 UStringTrieResult result=USTRINGTRIE_NO_VALUE;
222 char c;
223 while((c=*name++)!=0) {
224 c=uprv_invCharToLowercaseAscii(c);
225 // Ignore delimiters '-', '_', and ASCII White_Space.
226 if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
227 continue;
228 }
229 if(!USTRINGTRIE_HAS_NEXT(result)) {
230 return FALSE;
231 }
232 result=trie.next((uint8_t)c);
233 }
234 return USTRINGTRIE_HAS_VALUE(result);
b75a7d8f
A
235}
236
4388f060
A
237const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
238 int32_t valueMapIndex=findProperty(property);
239 if(valueMapIndex==0) {
240 return NULL; // Not a known property.
b75a7d8f 241 }
4388f060 242 return getName(nameGroups+valueMaps[valueMapIndex], nameChoice);
b75a7d8f
A
243}
244
4388f060
A
245const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) {
246 int32_t valueMapIndex=findProperty(property);
247 if(valueMapIndex==0) {
248 return NULL; // Not a known property.
249 }
250 int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
251 if(nameGroupOffset==0) {
252 return NULL;
253 }
254 return getName(nameGroups+nameGroupOffset, nameChoice);
b75a7d8f
A
255}
256
4388f060
A
257int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
258 BytesTrie trie(bytesTries+bytesTrieOffset);
259 if(containsName(trie, alias)) {
260 return trie.getValue();
261 } else {
262 return UCHAR_INVALID_CODE;
263 }
b75a7d8f
A
264}
265
4388f060
A
266int32_t PropNameData::getPropertyEnum(const char *alias) {
267 return getPropertyOrValueEnum(0, alias);
b75a7d8f 268}
b75a7d8f 269
4388f060
A
270int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) {
271 int32_t valueMapIndex=findProperty(property);
272 if(valueMapIndex==0) {
273 return UCHAR_INVALID_CODE; // Not a known property.
b75a7d8f 274 }
4388f060
A
275 valueMapIndex=valueMaps[valueMapIndex+1];
276 if(valueMapIndex==0) {
277 return UCHAR_INVALID_CODE; // The property does not have named values.
b75a7d8f 278 }
4388f060
A
279 // valueMapIndex is the start of the property's valueMap,
280 // where the first word is the BytesTrie offset.
281 return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
b75a7d8f 282}
4388f060 283U_NAMESPACE_END
b75a7d8f
A
284
285//----------------------------------------------------------------------
286// Public API implementation
287
b75a7d8f
A
288U_CAPI const char* U_EXPORT2
289u_getPropertyName(UProperty property,
290 UPropertyNameChoice nameChoice) {
4388f060
A
291 U_NAMESPACE_USE
292 return PropNameData::getPropertyName(property, nameChoice);
b75a7d8f
A
293}
294
295U_CAPI UProperty U_EXPORT2
296u_getPropertyEnum(const char* alias) {
4388f060
A
297 U_NAMESPACE_USE
298 return (UProperty)PropNameData::getPropertyEnum(alias);
b75a7d8f
A
299}
300
301U_CAPI const char* U_EXPORT2
302u_getPropertyValueName(UProperty property,
303 int32_t value,
304 UPropertyNameChoice nameChoice) {
4388f060
A
305 U_NAMESPACE_USE
306 return PropNameData::getPropertyValueName(property, value, nameChoice);
b75a7d8f
A
307}
308
309U_CAPI int32_t U_EXPORT2
310u_getPropertyValueEnum(UProperty property,
311 const char* alias) {
4388f060
A
312 U_NAMESPACE_USE
313 return PropNameData::getPropertyValueEnum(property, alias);
374ca955
A
314}
315
4388f060
A
316U_CAPI const char* U_EXPORT2
317uscript_getName(UScriptCode scriptCode){
318 return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
319 U_LONG_PROPERTY_NAME);
374ca955
A
320}
321
4388f060
A
322U_CAPI const char* U_EXPORT2
323uscript_getShortName(UScriptCode scriptCode){
324 return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
325 U_SHORT_PROPERTY_NAME);
374ca955 326}