]>
Commit | Line | Data |
---|---|---|
51004dcb A |
1 | /* |
2 | ******************************************************************************* | |
2ca993e8 | 3 | * Copyright (C) 2013-2015, International Business Machines |
51004dcb A |
4 | * Corporation and others. All Rights Reserved. |
5 | ******************************************************************************* | |
6 | * file name: uscript_props.cpp | |
7 | * encoding: US-ASCII | |
8 | * tab size: 8 (not used) | |
9 | * indentation:4 | |
10 | * | |
11 | * created on: 2013feb16 | |
12 | * created by: Markus W. Scherer | |
13 | */ | |
14 | ||
15 | #include "unicode/utypes.h" | |
16 | #include "unicode/unistr.h" | |
17 | #include "unicode/uscript.h" | |
18 | #include "unicode/utf16.h" | |
19 | #include "ustr_imp.h" | |
b331163b | 20 | #include "cmemory.h" |
51004dcb A |
21 | |
22 | namespace { | |
23 | ||
24 | // Script metadata (script properties). | |
25 | // See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt | |
26 | ||
27 | // 0 = NOT_ENCODED, no sample character, default false script properties. | |
28 | // Bits 20.. 0: sample character | |
29 | ||
30 | // Bits 23..21: usage | |
31 | const int32_t UNKNOWN = 1 << 21; | |
32 | const int32_t EXCLUSION = 2 << 21; | |
33 | const int32_t LIMITED_USE = 3 << 21; | |
34 | const int32_t ASPIRATIONAL = 4 << 21; | |
35 | const int32_t RECOMMENDED = 5 << 21; | |
36 | ||
37 | // Bits 31..24: Single-bit flags | |
38 | const int32_t RTL = 1 << 24; | |
39 | const int32_t LB_LETTERS = 1 << 25; | |
40 | const int32_t CASED = 1 << 26; | |
41 | ||
42 | const int32_t SCRIPT_PROPS[] = { | |
43 | // Begin copy-paste output from | |
44 | // tools/trunk/unicode/py/parsescriptmetadata.py | |
b331163b | 45 | 0x0040 | RECOMMENDED, // Zyyy |
2ca993e8 | 46 | 0x0308 | RECOMMENDED, // Zinh |
51004dcb A |
47 | 0x0628 | RECOMMENDED | RTL, // Arab |
48 | 0x0531 | RECOMMENDED | CASED, // Armn | |
49 | 0x0995 | RECOMMENDED, // Beng | |
50 | 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo | |
2ca993e8 | 51 | 0x13C4 | LIMITED_USE | CASED, // Cher |
51004dcb A |
52 | 0x03E2 | EXCLUSION | CASED, // Copt |
53 | 0x042F | RECOMMENDED | CASED, // Cyrl | |
54 | 0x10414 | EXCLUSION | CASED, // Dsrt | |
55 | 0x0905 | RECOMMENDED, // Deva | |
56 | 0x12A0 | RECOMMENDED, // Ethi | |
57 | 0x10D3 | RECOMMENDED, // Geor | |
58 | 0x10330 | EXCLUSION, // Goth | |
59 | 0x03A9 | RECOMMENDED | CASED, // Grek | |
60 | 0x0A95 | RECOMMENDED, // Gujr | |
61 | 0x0A15 | RECOMMENDED, // Guru | |
62 | 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani | |
63 | 0xAC00 | RECOMMENDED, // Hang | |
64 | 0x05D0 | RECOMMENDED | RTL, // Hebr | |
65 | 0x304B | RECOMMENDED | LB_LETTERS, // Hira | |
66 | 0x0C95 | RECOMMENDED, // Knda | |
67 | 0x30AB | RECOMMENDED | LB_LETTERS, // Kana | |
68 | 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr | |
69 | 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo | |
70 | 0x004C | RECOMMENDED | CASED, // Latn | |
71 | 0x0D15 | RECOMMENDED, // Mlym | |
72 | 0x1826 | ASPIRATIONAL, // Mong | |
73 | 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr | |
74 | 0x168F | EXCLUSION, // Ogam | |
2ca993e8 | 75 | 0x10308 | EXCLUSION, // Ital |
51004dcb A |
76 | 0x0B15 | RECOMMENDED, // Orya |
77 | 0x16A0 | EXCLUSION, // Runr | |
78 | 0x0D85 | RECOMMENDED, // Sinh | |
79 | 0x0710 | LIMITED_USE | RTL, // Syrc | |
80 | 0x0B95 | RECOMMENDED, // Taml | |
81 | 0x0C15 | RECOMMENDED, // Telu | |
82 | 0x078C | RECOMMENDED | RTL, // Thaa | |
83 | 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai | |
84 | 0x0F40 | RECOMMENDED, // Tibt | |
85 | 0x14C0 | ASPIRATIONAL, // Cans | |
86 | 0xA288 | ASPIRATIONAL | LB_LETTERS, // Yiii | |
87 | 0x1703 | EXCLUSION, // Tglg | |
88 | 0x1723 | EXCLUSION, // Hano | |
89 | 0x1743 | EXCLUSION, // Buhd | |
90 | 0x1763 | EXCLUSION, // Tagb | |
2ca993e8 | 91 | 0x280E | UNKNOWN, // Brai |
51004dcb A |
92 | 0x10800 | EXCLUSION | RTL, // Cprt |
93 | 0x1900 | LIMITED_USE, // Limb | |
94 | 0x10000 | EXCLUSION, // Linb | |
95 | 0x10480 | EXCLUSION, // Osma | |
96 | 0x10450 | EXCLUSION, // Shaw | |
97 | 0x1950 | LIMITED_USE | LB_LETTERS, // Tale | |
98 | 0x10380 | EXCLUSION, // Ugar | |
99 | 0, | |
100 | 0x1A00 | EXCLUSION, // Bugi | |
101 | 0x2C00 | EXCLUSION | CASED, // Glag | |
102 | 0x10A00 | EXCLUSION | RTL, // Khar | |
103 | 0xA800 | LIMITED_USE, // Sylo | |
104 | 0x1980 | LIMITED_USE | LB_LETTERS, // Talu | |
2ca993e8 | 105 | 0x2D5E | ASPIRATIONAL, // Tfng |
51004dcb | 106 | 0x103A0 | EXCLUSION, // Xpeo |
b331163b | 107 | 0x1B05 | LIMITED_USE, // Bali |
51004dcb A |
108 | 0x1BC0 | LIMITED_USE, // Batk |
109 | 0, | |
110 | 0x11005 | EXCLUSION, // Brah | |
111 | 0xAA00 | LIMITED_USE, // Cham | |
112 | 0, | |
113 | 0, | |
114 | 0, | |
115 | 0, | |
116 | 0x13153 | EXCLUSION, // Egyp | |
117 | 0, | |
118 | 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans | |
119 | 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant | |
b331163b | 120 | 0x16B1C | EXCLUSION, // Hmng |
2ca993e8 | 121 | 0x10CA1 | EXCLUSION | RTL | CASED, // Hung |
51004dcb | 122 | 0, |
b331163b | 123 | 0xA984 | LIMITED_USE, // Java |
51004dcb A |
124 | 0xA90A | LIMITED_USE, // Kali |
125 | 0, | |
126 | 0, | |
127 | 0x1C00 | LIMITED_USE, // Lepc | |
b331163b | 128 | 0x10647 | EXCLUSION, // Lina |
51004dcb A |
129 | 0x0840 | LIMITED_USE | RTL, // Mand |
130 | 0, | |
131 | 0x10980 | EXCLUSION | RTL, // Mero | |
2ca993e8 | 132 | 0x07D8 | LIMITED_USE | RTL, // Nkoo |
51004dcb | 133 | 0x10C00 | EXCLUSION | RTL, // Orkh |
b331163b | 134 | 0x1036B | EXCLUSION, // Perm |
51004dcb A |
135 | 0xA840 | EXCLUSION, // Phag |
136 | 0x10900 | EXCLUSION | RTL, // Phnx | |
137 | 0x16F00 | ASPIRATIONAL, // Plrd | |
138 | 0, | |
139 | 0, | |
140 | 0, | |
141 | 0, | |
142 | 0, | |
143 | 0, | |
144 | 0xA549 | LIMITED_USE, // Vaii | |
145 | 0, | |
146 | 0x12000 | EXCLUSION, // Xsux | |
147 | 0, | |
148 | 0xFDD0 | UNKNOWN, // Zzzz | |
2ca993e8 | 149 | 0x102B7 | EXCLUSION, // Cari |
51004dcb A |
150 | 0x304B | RECOMMENDED | LB_LETTERS, // Jpan |
151 | 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana | |
152 | 0x10280 | EXCLUSION, // Lyci | |
153 | 0x10920 | EXCLUSION | RTL, // Lydi | |
154 | 0x1C5A | LIMITED_USE, // Olck | |
155 | 0xA930 | EXCLUSION, // Rjng | |
156 | 0xA882 | LIMITED_USE, // Saur | |
2ca993e8 | 157 | 0x1D850 | EXCLUSION, // Sgnw |
51004dcb A |
158 | 0x1B83 | LIMITED_USE, // Sund |
159 | 0, | |
160 | 0xABC0 | LIMITED_USE, // Mtei | |
161 | 0x10840 | EXCLUSION | RTL, // Armi | |
162 | 0x10B00 | EXCLUSION | RTL, // Avst | |
163 | 0x11103 | LIMITED_USE, // Cakm | |
164 | 0xAC00 | RECOMMENDED, // Kore | |
165 | 0x11083 | EXCLUSION, // Kthi | |
2ca993e8 | 166 | 0x10AC1 | EXCLUSION | RTL, // Mani |
51004dcb | 167 | 0x10B60 | EXCLUSION | RTL, // Phli |
b331163b | 168 | 0x10B8F | EXCLUSION | RTL, // Phlp |
51004dcb A |
169 | 0, |
170 | 0x10B40 | EXCLUSION | RTL, // Prti | |
171 | 0x0800 | EXCLUSION | RTL, // Samr | |
172 | 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt | |
173 | 0, | |
174 | 0, | |
175 | 0xA6A0 | LIMITED_USE, // Bamu | |
2ca993e8 | 176 | 0xA4E8 | LIMITED_USE, // Lisu |
51004dcb A |
177 | 0, |
178 | 0x10A60 | EXCLUSION | RTL, // Sarb | |
b331163b A |
179 | 0x16AE6 | EXCLUSION, // Bass |
180 | 0x1BC20 | EXCLUSION, // Dupl | |
181 | 0x10500 | EXCLUSION, // Elba | |
182 | 0x11315 | EXCLUSION, // Gran | |
51004dcb A |
183 | 0, |
184 | 0, | |
b331163b | 185 | 0x1E802 | EXCLUSION | RTL, // Mend |
51004dcb | 186 | 0x109A0 | EXCLUSION | RTL, // Merc |
b331163b A |
187 | 0x10A95 | EXCLUSION | RTL, // Narb |
188 | 0x10896 | EXCLUSION | RTL, // Nbat | |
189 | 0x10873 | EXCLUSION | RTL, // Palm | |
190 | 0x112BE | EXCLUSION, // Sind | |
191 | 0x118B4 | EXCLUSION | CASED, // Wara | |
51004dcb A |
192 | 0, |
193 | 0, | |
b331163b | 194 | 0x16A4F | EXCLUSION, // Mroo |
51004dcb A |
195 | 0, |
196 | 0x11183 | EXCLUSION, // Shrd | |
197 | 0x110D0 | EXCLUSION, // Sora | |
198 | 0x11680 | EXCLUSION, // Takr | |
199 | 0, | |
200 | 0, | |
2ca993e8 | 201 | 0x14400 | EXCLUSION, // Hluw |
b331163b A |
202 | 0x11208 | EXCLUSION, // Khoj |
203 | 0x11484 | EXCLUSION, // Tirh | |
204 | 0x10537 | EXCLUSION, // Aghb | |
205 | 0x11152 | EXCLUSION, // Mahj | |
2ca993e8 A |
206 | 0x11717 | EXCLUSION | LB_LETTERS, // Ahom |
207 | 0x108F4 | EXCLUSION | RTL, // Hatr | |
b331163b | 208 | 0x1160E | EXCLUSION, // Modi |
2ca993e8 | 209 | 0x1128F | EXCLUSION, // Mult |
b331163b A |
210 | 0x11AC0 | EXCLUSION, // Pauc |
211 | 0x1158E | EXCLUSION, // Sidd | |
51004dcb A |
212 | // End copy-paste from parsescriptmetadata.py |
213 | }; | |
214 | ||
215 | int32_t getScriptProps(UScriptCode script) { | |
b331163b | 216 | if (0 <= script && script < UPRV_LENGTHOF(SCRIPT_PROPS)) { |
51004dcb A |
217 | return SCRIPT_PROPS[script]; |
218 | } else { | |
219 | return 0; | |
220 | } | |
221 | } | |
222 | ||
223 | } // namespace | |
224 | ||
225 | U_CAPI int32_t U_EXPORT2 | |
226 | uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) { | |
227 | if(U_FAILURE(*pErrorCode)) { return 0; } | |
228 | if(capacity < 0 || (capacity > 0 && dest == NULL)) { | |
229 | *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; | |
230 | return 0; | |
231 | } | |
232 | int32_t sampleChar = getScriptProps(script) & 0x1fffff; | |
233 | int32_t length; | |
234 | if(sampleChar == 0) { | |
235 | length = 0; | |
236 | } else { | |
237 | length = U16_LENGTH(sampleChar); | |
238 | if(length <= capacity) { | |
239 | int32_t i = 0; | |
240 | U16_APPEND_UNSAFE(dest, i, sampleChar); | |
241 | } | |
242 | } | |
243 | return u_terminateUChars(dest, capacity, length, pErrorCode); | |
244 | } | |
245 | ||
246 | U_COMMON_API icu::UnicodeString U_EXPORT2 | |
247 | uscript_getSampleUnicodeString(UScriptCode script) { | |
248 | icu::UnicodeString sample; | |
249 | int32_t sampleChar = getScriptProps(script) & 0x1fffff; | |
250 | if(sampleChar != 0) { | |
251 | sample.append(sampleChar); | |
252 | } | |
253 | return sample; | |
254 | } | |
255 | ||
256 | U_CAPI UScriptUsage U_EXPORT2 | |
257 | uscript_getUsage(UScriptCode script) { | |
258 | return (UScriptUsage)((getScriptProps(script) >> 21) & 7); | |
259 | } | |
260 | ||
261 | U_CAPI UBool U_EXPORT2 | |
262 | uscript_isRightToLeft(UScriptCode script) { | |
263 | return (getScriptProps(script) & RTL) != 0; | |
264 | } | |
265 | ||
266 | U_CAPI UBool U_EXPORT2 | |
267 | uscript_breaksBetweenLetters(UScriptCode script) { | |
268 | return (getScriptProps(script) & LB_LETTERS) != 0; | |
269 | } | |
270 | ||
271 | U_CAPI UBool U_EXPORT2 | |
272 | uscript_isCased(UScriptCode script) { | |
273 | return (getScriptProps(script) & CASED) != 0; | |
274 | } |