]>
Commit | Line | Data |
---|---|---|
51004dcb A |
1 | /* |
2 | ******************************************************************************* | |
3 | * Copyright (C) 2013, International Business Machines | |
4 | * Corporation and others. All Rights Reserved. | |
5 | ******************************************************************************* | |
6 | * file name: uscript_props.cpp | |
7 | * encoding: US-ASCII | |
8 | * tab size: 8 (not used) | |
9 | * indentation:4 | |
10 | * | |
11 | * created on: 2013feb16 | |
12 | * created by: Markus W. Scherer | |
13 | */ | |
14 | ||
15 | #include "unicode/utypes.h" | |
16 | #include "unicode/unistr.h" | |
17 | #include "unicode/uscript.h" | |
18 | #include "unicode/utf16.h" | |
19 | #include "ustr_imp.h" | |
20 | ||
21 | #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | |
22 | ||
23 | namespace { | |
24 | ||
25 | // Script metadata (script properties). | |
26 | // See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt | |
27 | ||
28 | // 0 = NOT_ENCODED, no sample character, default false script properties. | |
29 | // Bits 20.. 0: sample character | |
30 | ||
31 | // Bits 23..21: usage | |
32 | const int32_t UNKNOWN = 1 << 21; | |
33 | const int32_t EXCLUSION = 2 << 21; | |
34 | const int32_t LIMITED_USE = 3 << 21; | |
35 | const int32_t ASPIRATIONAL = 4 << 21; | |
36 | const int32_t RECOMMENDED = 5 << 21; | |
37 | ||
38 | // Bits 31..24: Single-bit flags | |
39 | const int32_t RTL = 1 << 24; | |
40 | const int32_t LB_LETTERS = 1 << 25; | |
41 | const int32_t CASED = 1 << 26; | |
42 | ||
43 | const int32_t SCRIPT_PROPS[] = { | |
44 | // Begin copy-paste output from | |
45 | // tools/trunk/unicode/py/parsescriptmetadata.py | |
46 | 0x0040 | UNKNOWN, // Zyyy | |
47 | 0x0308 | UNKNOWN, // Zinh | |
48 | 0x0628 | RECOMMENDED | RTL, // Arab | |
49 | 0x0531 | RECOMMENDED | CASED, // Armn | |
50 | 0x0995 | RECOMMENDED, // Beng | |
51 | 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo | |
52 | 0x13C4 | LIMITED_USE, // Cher | |
53 | 0x03E2 | EXCLUSION | CASED, // Copt | |
54 | 0x042F | RECOMMENDED | CASED, // Cyrl | |
55 | 0x10414 | EXCLUSION | CASED, // Dsrt | |
56 | 0x0905 | RECOMMENDED, // Deva | |
57 | 0x12A0 | RECOMMENDED, // Ethi | |
58 | 0x10D3 | RECOMMENDED, // Geor | |
59 | 0x10330 | EXCLUSION, // Goth | |
60 | 0x03A9 | RECOMMENDED | CASED, // Grek | |
61 | 0x0A95 | RECOMMENDED, // Gujr | |
62 | 0x0A15 | RECOMMENDED, // Guru | |
63 | 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani | |
64 | 0xAC00 | RECOMMENDED, // Hang | |
65 | 0x05D0 | RECOMMENDED | RTL, // Hebr | |
66 | 0x304B | RECOMMENDED | LB_LETTERS, // Hira | |
67 | 0x0C95 | RECOMMENDED, // Knda | |
68 | 0x30AB | RECOMMENDED | LB_LETTERS, // Kana | |
69 | 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr | |
70 | 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo | |
71 | 0x004C | RECOMMENDED | CASED, // Latn | |
72 | 0x0D15 | RECOMMENDED, // Mlym | |
73 | 0x1826 | ASPIRATIONAL, // Mong | |
74 | 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr | |
75 | 0x168F | EXCLUSION, // Ogam | |
76 | 0x10300 | EXCLUSION, // Ital | |
77 | 0x0B15 | RECOMMENDED, // Orya | |
78 | 0x16A0 | EXCLUSION, // Runr | |
79 | 0x0D85 | RECOMMENDED, // Sinh | |
80 | 0x0710 | LIMITED_USE | RTL, // Syrc | |
81 | 0x0B95 | RECOMMENDED, // Taml | |
82 | 0x0C15 | RECOMMENDED, // Telu | |
83 | 0x078C | RECOMMENDED | RTL, // Thaa | |
84 | 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai | |
85 | 0x0F40 | RECOMMENDED, // Tibt | |
86 | 0x14C0 | ASPIRATIONAL, // Cans | |
87 | 0xA288 | ASPIRATIONAL | LB_LETTERS, // Yiii | |
88 | 0x1703 | EXCLUSION, // Tglg | |
89 | 0x1723 | EXCLUSION, // Hano | |
90 | 0x1743 | EXCLUSION, // Buhd | |
91 | 0x1763 | EXCLUSION, // Tagb | |
92 | 0x2800 | UNKNOWN, // Brai | |
93 | 0x10800 | EXCLUSION | RTL, // Cprt | |
94 | 0x1900 | LIMITED_USE, // Limb | |
95 | 0x10000 | EXCLUSION, // Linb | |
96 | 0x10480 | EXCLUSION, // Osma | |
97 | 0x10450 | EXCLUSION, // Shaw | |
98 | 0x1950 | LIMITED_USE | LB_LETTERS, // Tale | |
99 | 0x10380 | EXCLUSION, // Ugar | |
100 | 0, | |
101 | 0x1A00 | EXCLUSION, // Bugi | |
102 | 0x2C00 | EXCLUSION | CASED, // Glag | |
103 | 0x10A00 | EXCLUSION | RTL, // Khar | |
104 | 0xA800 | LIMITED_USE, // Sylo | |
105 | 0x1980 | LIMITED_USE | LB_LETTERS, // Talu | |
106 | 0x2D30 | ASPIRATIONAL, // Tfng | |
107 | 0x103A0 | EXCLUSION, // Xpeo | |
108 | 0x1B05 | LIMITED_USE | LB_LETTERS, // Bali | |
109 | 0x1BC0 | LIMITED_USE, // Batk | |
110 | 0, | |
111 | 0x11005 | EXCLUSION, // Brah | |
112 | 0xAA00 | LIMITED_USE, // Cham | |
113 | 0, | |
114 | 0, | |
115 | 0, | |
116 | 0, | |
117 | 0x13153 | EXCLUSION, // Egyp | |
118 | 0, | |
119 | 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans | |
120 | 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant | |
121 | 0, | |
122 | 0, | |
123 | 0, | |
124 | 0xA984 | LIMITED_USE | LB_LETTERS, // Java | |
125 | 0xA90A | LIMITED_USE, // Kali | |
126 | 0, | |
127 | 0, | |
128 | 0x1C00 | LIMITED_USE, // Lepc | |
129 | 0, | |
130 | 0x0840 | LIMITED_USE | RTL, // Mand | |
131 | 0, | |
132 | 0x10980 | EXCLUSION | RTL, // Mero | |
133 | 0x07CA | LIMITED_USE | RTL, // Nkoo | |
134 | 0x10C00 | EXCLUSION | RTL, // Orkh | |
135 | 0, | |
136 | 0xA840 | EXCLUSION, // Phag | |
137 | 0x10900 | EXCLUSION | RTL, // Phnx | |
138 | 0x16F00 | ASPIRATIONAL, // Plrd | |
139 | 0, | |
140 | 0, | |
141 | 0, | |
142 | 0, | |
143 | 0, | |
144 | 0, | |
145 | 0xA549 | LIMITED_USE, // Vaii | |
146 | 0, | |
147 | 0x12000 | EXCLUSION, // Xsux | |
148 | 0, | |
149 | 0xFDD0 | UNKNOWN, // Zzzz | |
150 | 0x102A0 | EXCLUSION, // Cari | |
151 | 0x304B | RECOMMENDED | LB_LETTERS, // Jpan | |
152 | 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana | |
153 | 0x10280 | EXCLUSION, // Lyci | |
154 | 0x10920 | EXCLUSION | RTL, // Lydi | |
155 | 0x1C5A | LIMITED_USE, // Olck | |
156 | 0xA930 | EXCLUSION, // Rjng | |
157 | 0xA882 | LIMITED_USE, // Saur | |
158 | 0, | |
159 | 0x1B83 | LIMITED_USE, // Sund | |
160 | 0, | |
161 | 0xABC0 | LIMITED_USE, // Mtei | |
162 | 0x10840 | EXCLUSION | RTL, // Armi | |
163 | 0x10B00 | EXCLUSION | RTL, // Avst | |
164 | 0x11103 | LIMITED_USE, // Cakm | |
165 | 0xAC00 | RECOMMENDED, // Kore | |
166 | 0x11083 | EXCLUSION, // Kthi | |
167 | 0, | |
168 | 0x10B60 | EXCLUSION | RTL, // Phli | |
169 | 0, | |
170 | 0, | |
171 | 0x10B40 | EXCLUSION | RTL, // Prti | |
172 | 0x0800 | EXCLUSION | RTL, // Samr | |
173 | 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt | |
174 | 0, | |
175 | 0, | |
176 | 0xA6A0 | LIMITED_USE, // Bamu | |
177 | 0xA4D0 | LIMITED_USE, // Lisu | |
178 | 0, | |
179 | 0x10A60 | EXCLUSION | RTL, // Sarb | |
180 | 0, | |
181 | 0, | |
182 | 0, | |
183 | 0, | |
184 | 0, | |
185 | 0, | |
186 | 0, | |
187 | 0x109A0 | EXCLUSION | RTL, // Merc | |
188 | 0, | |
189 | 0, | |
190 | 0, | |
191 | 0, | |
192 | 0, | |
193 | 0, | |
194 | 0, | |
195 | 0, | |
196 | 0, | |
197 | 0x11183 | EXCLUSION, // Shrd | |
198 | 0x110D0 | EXCLUSION, // Sora | |
199 | 0x11680 | EXCLUSION, // Takr | |
200 | 0, | |
201 | 0, | |
202 | 0, | |
203 | 0, | |
204 | 0, | |
57a6839d A |
205 | 0, |
206 | 0, | |
51004dcb A |
207 | // End copy-paste from parsescriptmetadata.py |
208 | }; | |
209 | ||
210 | int32_t getScriptProps(UScriptCode script) { | |
211 | if (0 <= script && script < LENGTHOF(SCRIPT_PROPS)) { | |
212 | return SCRIPT_PROPS[script]; | |
213 | } else { | |
214 | return 0; | |
215 | } | |
216 | } | |
217 | ||
218 | } // namespace | |
219 | ||
220 | U_CAPI int32_t U_EXPORT2 | |
221 | uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) { | |
222 | if(U_FAILURE(*pErrorCode)) { return 0; } | |
223 | if(capacity < 0 || (capacity > 0 && dest == NULL)) { | |
224 | *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; | |
225 | return 0; | |
226 | } | |
227 | int32_t sampleChar = getScriptProps(script) & 0x1fffff; | |
228 | int32_t length; | |
229 | if(sampleChar == 0) { | |
230 | length = 0; | |
231 | } else { | |
232 | length = U16_LENGTH(sampleChar); | |
233 | if(length <= capacity) { | |
234 | int32_t i = 0; | |
235 | U16_APPEND_UNSAFE(dest, i, sampleChar); | |
236 | } | |
237 | } | |
238 | return u_terminateUChars(dest, capacity, length, pErrorCode); | |
239 | } | |
240 | ||
241 | U_COMMON_API icu::UnicodeString U_EXPORT2 | |
242 | uscript_getSampleUnicodeString(UScriptCode script) { | |
243 | icu::UnicodeString sample; | |
244 | int32_t sampleChar = getScriptProps(script) & 0x1fffff; | |
245 | if(sampleChar != 0) { | |
246 | sample.append(sampleChar); | |
247 | } | |
248 | return sample; | |
249 | } | |
250 | ||
251 | U_CAPI UScriptUsage U_EXPORT2 | |
252 | uscript_getUsage(UScriptCode script) { | |
253 | return (UScriptUsage)((getScriptProps(script) >> 21) & 7); | |
254 | } | |
255 | ||
256 | U_CAPI UBool U_EXPORT2 | |
257 | uscript_isRightToLeft(UScriptCode script) { | |
258 | return (getScriptProps(script) & RTL) != 0; | |
259 | } | |
260 | ||
261 | U_CAPI UBool U_EXPORT2 | |
262 | uscript_breaksBetweenLetters(UScriptCode script) { | |
263 | return (getScriptProps(script) & LB_LETTERS) != 0; | |
264 | } | |
265 | ||
266 | U_CAPI UBool U_EXPORT2 | |
267 | uscript_isCased(UScriptCode script) { | |
268 | return (getScriptProps(script) & CASED) != 0; | |
269 | } |