]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/uscript_props.cpp
ICU-511.32.tar.gz
[apple/icu.git] / icuSources / common / uscript_props.cpp
1 /*
2 *******************************************************************************
3 * Copyright (C) 2013, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: uscript_props.cpp
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * created on: 2013feb16
12 * created by: Markus W. Scherer
13 */
14
15 #include "unicode/utypes.h"
16 #include "unicode/unistr.h"
17 #include "unicode/uscript.h"
18 #include "unicode/utf16.h"
19 #include "ustr_imp.h"
20
21 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
22
23 namespace {
24
25 // Script metadata (script properties).
26 // See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
27
28 // 0 = NOT_ENCODED, no sample character, default false script properties.
29 // Bits 20.. 0: sample character
30
31 // Bits 23..21: usage
32 const int32_t UNKNOWN = 1 << 21;
33 const int32_t EXCLUSION = 2 << 21;
34 const int32_t LIMITED_USE = 3 << 21;
35 const int32_t ASPIRATIONAL = 4 << 21;
36 const int32_t RECOMMENDED = 5 << 21;
37
38 // Bits 31..24: Single-bit flags
39 const int32_t RTL = 1 << 24;
40 const int32_t LB_LETTERS = 1 << 25;
41 const int32_t CASED = 1 << 26;
42
43 const int32_t SCRIPT_PROPS[] = {
44 // Begin copy-paste output from
45 // tools/trunk/unicode/py/parsescriptmetadata.py
46 0x0040 | UNKNOWN, // Zyyy
47 0x0308 | UNKNOWN, // Zinh
48 0x0628 | RECOMMENDED | RTL, // Arab
49 0x0531 | RECOMMENDED | CASED, // Armn
50 0x0995 | RECOMMENDED, // Beng
51 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo
52 0x13C4 | LIMITED_USE, // Cher
53 0x03E2 | EXCLUSION | CASED, // Copt
54 0x042F | RECOMMENDED | CASED, // Cyrl
55 0x10414 | EXCLUSION | CASED, // Dsrt
56 0x0905 | RECOMMENDED, // Deva
57 0x12A0 | RECOMMENDED, // Ethi
58 0x10D3 | RECOMMENDED, // Geor
59 0x10330 | EXCLUSION, // Goth
60 0x03A9 | RECOMMENDED | CASED, // Grek
61 0x0A95 | RECOMMENDED, // Gujr
62 0x0A15 | RECOMMENDED, // Guru
63 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani
64 0xAC00 | RECOMMENDED, // Hang
65 0x05D0 | RECOMMENDED | RTL, // Hebr
66 0x304B | RECOMMENDED | LB_LETTERS, // Hira
67 0x0C95 | RECOMMENDED, // Knda
68 0x30AB | RECOMMENDED | LB_LETTERS, // Kana
69 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr
70 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo
71 0x004C | RECOMMENDED | CASED, // Latn
72 0x0D15 | RECOMMENDED, // Mlym
73 0x1826 | ASPIRATIONAL, // Mong
74 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr
75 0x168F | EXCLUSION, // Ogam
76 0x10300 | EXCLUSION, // Ital
77 0x0B15 | RECOMMENDED, // Orya
78 0x16A0 | EXCLUSION, // Runr
79 0x0D85 | RECOMMENDED, // Sinh
80 0x0710 | LIMITED_USE | RTL, // Syrc
81 0x0B95 | RECOMMENDED, // Taml
82 0x0C15 | RECOMMENDED, // Telu
83 0x078C | RECOMMENDED | RTL, // Thaa
84 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai
85 0x0F40 | RECOMMENDED, // Tibt
86 0x14C0 | ASPIRATIONAL, // Cans
87 0xA288 | ASPIRATIONAL | LB_LETTERS, // Yiii
88 0x1703 | EXCLUSION, // Tglg
89 0x1723 | EXCLUSION, // Hano
90 0x1743 | EXCLUSION, // Buhd
91 0x1763 | EXCLUSION, // Tagb
92 0x2800 | UNKNOWN, // Brai
93 0x10800 | EXCLUSION | RTL, // Cprt
94 0x1900 | LIMITED_USE, // Limb
95 0x10000 | EXCLUSION, // Linb
96 0x10480 | EXCLUSION, // Osma
97 0x10450 | EXCLUSION, // Shaw
98 0x1950 | LIMITED_USE | LB_LETTERS, // Tale
99 0x10380 | EXCLUSION, // Ugar
100 0,
101 0x1A00 | EXCLUSION, // Bugi
102 0x2C00 | EXCLUSION | CASED, // Glag
103 0x10A00 | EXCLUSION | RTL, // Khar
104 0xA800 | LIMITED_USE, // Sylo
105 0x1980 | LIMITED_USE | LB_LETTERS, // Talu
106 0x2D30 | ASPIRATIONAL, // Tfng
107 0x103A0 | EXCLUSION, // Xpeo
108 0x1B05 | LIMITED_USE | LB_LETTERS, // Bali
109 0x1BC0 | LIMITED_USE, // Batk
110 0,
111 0x11005 | EXCLUSION, // Brah
112 0xAA00 | LIMITED_USE, // Cham
113 0,
114 0,
115 0,
116 0,
117 0x13153 | EXCLUSION, // Egyp
118 0,
119 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans
120 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant
121 0,
122 0,
123 0,
124 0xA984 | LIMITED_USE | LB_LETTERS, // Java
125 0xA90A | LIMITED_USE, // Kali
126 0,
127 0,
128 0x1C00 | LIMITED_USE, // Lepc
129 0,
130 0x0840 | LIMITED_USE | RTL, // Mand
131 0,
132 0x10980 | EXCLUSION | RTL, // Mero
133 0x07CA | LIMITED_USE | RTL, // Nkoo
134 0x10C00 | EXCLUSION | RTL, // Orkh
135 0,
136 0xA840 | EXCLUSION, // Phag
137 0x10900 | EXCLUSION | RTL, // Phnx
138 0x16F00 | ASPIRATIONAL, // Plrd
139 0,
140 0,
141 0,
142 0,
143 0,
144 0,
145 0xA549 | LIMITED_USE, // Vaii
146 0,
147 0x12000 | EXCLUSION, // Xsux
148 0,
149 0xFDD0 | UNKNOWN, // Zzzz
150 0x102A0 | EXCLUSION, // Cari
151 0x304B | RECOMMENDED | LB_LETTERS, // Jpan
152 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana
153 0x10280 | EXCLUSION, // Lyci
154 0x10920 | EXCLUSION | RTL, // Lydi
155 0x1C5A | LIMITED_USE, // Olck
156 0xA930 | EXCLUSION, // Rjng
157 0xA882 | LIMITED_USE, // Saur
158 0,
159 0x1B83 | LIMITED_USE, // Sund
160 0,
161 0xABC0 | LIMITED_USE, // Mtei
162 0x10840 | EXCLUSION | RTL, // Armi
163 0x10B00 | EXCLUSION | RTL, // Avst
164 0x11103 | LIMITED_USE, // Cakm
165 0xAC00 | RECOMMENDED, // Kore
166 0x11083 | EXCLUSION, // Kthi
167 0,
168 0x10B60 | EXCLUSION | RTL, // Phli
169 0,
170 0,
171 0x10B40 | EXCLUSION | RTL, // Prti
172 0x0800 | EXCLUSION | RTL, // Samr
173 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt
174 0,
175 0,
176 0xA6A0 | LIMITED_USE, // Bamu
177 0xA4D0 | LIMITED_USE, // Lisu
178 0,
179 0x10A60 | EXCLUSION | RTL, // Sarb
180 0,
181 0,
182 0,
183 0,
184 0,
185 0,
186 0,
187 0x109A0 | EXCLUSION | RTL, // Merc
188 0,
189 0,
190 0,
191 0,
192 0,
193 0,
194 0,
195 0,
196 0,
197 0x11183 | EXCLUSION, // Shrd
198 0x110D0 | EXCLUSION, // Sora
199 0x11680 | EXCLUSION, // Takr
200 0,
201 0,
202 0,
203 0,
204 0,
205 // End copy-paste from parsescriptmetadata.py
206 };
207
208 int32_t getScriptProps(UScriptCode script) {
209 if (0 <= script && script < LENGTHOF(SCRIPT_PROPS)) {
210 return SCRIPT_PROPS[script];
211 } else {
212 return 0;
213 }
214 }
215
216 } // namespace
217
218 U_CAPI int32_t U_EXPORT2
219 uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) {
220 if(U_FAILURE(*pErrorCode)) { return 0; }
221 if(capacity < 0 || (capacity > 0 && dest == NULL)) {
222 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
223 return 0;
224 }
225 int32_t sampleChar = getScriptProps(script) & 0x1fffff;
226 int32_t length;
227 if(sampleChar == 0) {
228 length = 0;
229 } else {
230 length = U16_LENGTH(sampleChar);
231 if(length <= capacity) {
232 int32_t i = 0;
233 U16_APPEND_UNSAFE(dest, i, sampleChar);
234 }
235 }
236 return u_terminateUChars(dest, capacity, length, pErrorCode);
237 }
238
239 U_COMMON_API icu::UnicodeString U_EXPORT2
240 uscript_getSampleUnicodeString(UScriptCode script) {
241 icu::UnicodeString sample;
242 int32_t sampleChar = getScriptProps(script) & 0x1fffff;
243 if(sampleChar != 0) {
244 sample.append(sampleChar);
245 }
246 return sample;
247 }
248
249 U_CAPI UScriptUsage U_EXPORT2
250 uscript_getUsage(UScriptCode script) {
251 return (UScriptUsage)((getScriptProps(script) >> 21) & 7);
252 }
253
254 U_CAPI UBool U_EXPORT2
255 uscript_isRightToLeft(UScriptCode script) {
256 return (getScriptProps(script) & RTL) != 0;
257 }
258
259 U_CAPI UBool U_EXPORT2
260 uscript_breaksBetweenLetters(UScriptCode script) {
261 return (getScriptProps(script) & LB_LETTERS) != 0;
262 }
263
264 U_CAPI UBool U_EXPORT2
265 uscript_isCased(UScriptCode script) {
266 return (getScriptProps(script) & CASED) != 0;
267 }