1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 2013-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: uscript_props.cpp
10 * tab size: 8 (not used)
13 * created on: 2013feb16
14 * created by: Markus W. Scherer
17 #include "unicode/utypes.h"
18 #include "unicode/unistr.h"
19 #include "unicode/uscript.h"
20 #include "unicode/utf16.h"
26 // Script metadata (script properties).
27 // See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
29 // 0 = NOT_ENCODED, no sample character, default false script properties.
30 // Bits 20.. 0: sample character
33 const int32_t UNKNOWN
= 1 << 21;
34 const int32_t EXCLUSION
= 2 << 21;
35 const int32_t LIMITED_USE
= 3 << 21;
36 // st int32_t ASPIRATIONAL = 4 << 21; -- not used any more since Unicode 10
37 const int32_t RECOMMENDED
= 5 << 21;
39 // Bits 31..24: Single-bit flags
40 const int32_t RTL
= 1 << 24;
41 const int32_t LB_LETTERS
= 1 << 25;
42 const int32_t CASED
= 1 << 26;
44 const int32_t SCRIPT_PROPS
[] = {
45 // Begin copy-paste output from
46 // tools/trunk/unicode/py/parsescriptmetadata.py
47 0x0040 | RECOMMENDED
, // Zyyy
48 0x0308 | RECOMMENDED
, // Zinh
49 0x0628 | RECOMMENDED
| RTL
, // Arab
50 0x0531 | RECOMMENDED
| CASED
, // Armn
51 0x0995 | RECOMMENDED
, // Beng
52 0x3105 | RECOMMENDED
| LB_LETTERS
, // Bopo
53 0x13C4 | LIMITED_USE
| CASED
, // Cher
54 0x03E2 | EXCLUSION
| CASED
, // Copt
55 0x042F | RECOMMENDED
| CASED
, // Cyrl
56 0x10414 | EXCLUSION
| CASED
, // Dsrt
57 0x0905 | RECOMMENDED
, // Deva
58 0x12A0 | RECOMMENDED
, // Ethi
59 0x10D3 | RECOMMENDED
, // Geor
60 0x10330 | EXCLUSION
, // Goth
61 0x03A9 | RECOMMENDED
| CASED
, // Grek
62 0x0A95 | RECOMMENDED
, // Gujr
63 0x0A15 | RECOMMENDED
, // Guru
64 0x5B57 | RECOMMENDED
| LB_LETTERS
, // Hani
65 0xAC00 | RECOMMENDED
, // Hang
66 0x05D0 | RECOMMENDED
| RTL
, // Hebr
67 0x304B | RECOMMENDED
| LB_LETTERS
, // Hira
68 0x0C95 | RECOMMENDED
, // Knda
69 0x30AB | RECOMMENDED
| LB_LETTERS
, // Kana
70 0x1780 | RECOMMENDED
| LB_LETTERS
, // Khmr
71 0x0EA5 | RECOMMENDED
| LB_LETTERS
, // Laoo
72 0x004C | RECOMMENDED
| CASED
, // Latn
73 0x0D15 | RECOMMENDED
, // Mlym
74 0x1826 | EXCLUSION
, // Mong
75 0x1000 | RECOMMENDED
| LB_LETTERS
, // Mymr
76 0x168F | EXCLUSION
, // Ogam
77 0x10300 | EXCLUSION
, // Ital
78 0x0B15 | RECOMMENDED
, // Orya
79 0x16A0 | EXCLUSION
, // Runr
80 0x0D85 | RECOMMENDED
, // Sinh
81 0x0710 | LIMITED_USE
| RTL
, // Syrc
82 0x0B95 | RECOMMENDED
, // Taml
83 0x0C15 | RECOMMENDED
, // Telu
84 0x078C | RECOMMENDED
| RTL
, // Thaa
85 0x0E17 | RECOMMENDED
| LB_LETTERS
, // Thai
86 0x0F40 | RECOMMENDED
, // Tibt
87 0x14C0 | LIMITED_USE
, // Cans
88 0xA288 | LIMITED_USE
| LB_LETTERS
, // Yiii
89 0x1703 | EXCLUSION
, // Tglg
90 0x1723 | EXCLUSION
, // Hano
91 0x1743 | EXCLUSION
, // Buhd
92 0x1763 | EXCLUSION
, // Tagb
93 0x280E | UNKNOWN
, // Brai
94 0x10800 | EXCLUSION
| RTL
, // Cprt
95 0x1900 | LIMITED_USE
, // Limb
96 0x10000 | EXCLUSION
, // Linb
97 0x10480 | EXCLUSION
, // Osma
98 0x10450 | EXCLUSION
, // Shaw
99 0x1950 | LIMITED_USE
| LB_LETTERS
, // Tale
100 0x10380 | EXCLUSION
, // Ugar
102 0x1A00 | EXCLUSION
, // Bugi
103 0x2C00 | EXCLUSION
| CASED
, // Glag
104 0x10A00 | EXCLUSION
| RTL
, // Khar
105 0xA800 | LIMITED_USE
, // Sylo
106 0x1980 | LIMITED_USE
| LB_LETTERS
, // Talu
107 0x2D30 | LIMITED_USE
, // Tfng
108 0x103A0 | EXCLUSION
, // Xpeo
109 0x1B05 | LIMITED_USE
, // Bali
110 0x1BC0 | LIMITED_USE
, // Batk
112 0x11005 | EXCLUSION
, // Brah
113 0xAA00 | LIMITED_USE
, // Cham
118 0x13153 | EXCLUSION
, // Egyp
120 0x5B57 | RECOMMENDED
| LB_LETTERS
, // Hans
121 0x5B57 | RECOMMENDED
| LB_LETTERS
, // Hant
122 0x16B1C | EXCLUSION
, // Hmng
123 0x10CA1 | EXCLUSION
| RTL
| CASED
, // Hung
125 0xA984 | LIMITED_USE
, // Java
126 0xA90A | LIMITED_USE
, // Kali
129 0x1C00 | LIMITED_USE
, // Lepc
130 0x10647 | EXCLUSION
, // Lina
131 0x0840 | LIMITED_USE
| RTL
, // Mand
133 0x10980 | EXCLUSION
| RTL
, // Mero
134 0x07CA | LIMITED_USE
| RTL
, // Nkoo
135 0x10C00 | EXCLUSION
| RTL
, // Orkh
136 0x1036B | EXCLUSION
, // Perm
137 0xA840 | EXCLUSION
, // Phag
138 0x10900 | EXCLUSION
| RTL
, // Phnx
139 0x16F00 | LIMITED_USE
, // Plrd
146 0xA549 | LIMITED_USE
, // Vaii
148 0x12000 | EXCLUSION
, // Xsux
150 0xFDD0 | UNKNOWN
, // Zzzz
151 0x102A0 | EXCLUSION
, // Cari
152 0x304B | RECOMMENDED
| LB_LETTERS
, // Jpan
153 0x1A20 | LIMITED_USE
| LB_LETTERS
, // Lana
154 0x10280 | EXCLUSION
, // Lyci
155 0x10920 | EXCLUSION
| RTL
, // Lydi
156 0x1C5A | LIMITED_USE
, // Olck
157 0xA930 | EXCLUSION
, // Rjng
158 0xA882 | LIMITED_USE
, // Saur
159 0x1D850 | EXCLUSION
, // Sgnw
160 0x1B83 | LIMITED_USE
, // Sund
162 0xABC0 | LIMITED_USE
, // Mtei
163 0x10840 | EXCLUSION
| RTL
, // Armi
164 0x10B00 | EXCLUSION
| RTL
, // Avst
165 0x11103 | LIMITED_USE
, // Cakm
166 0xAC00 | RECOMMENDED
, // Kore
167 0x11083 | EXCLUSION
, // Kthi
168 0x10AD8 | EXCLUSION
| RTL
, // Mani
169 0x10B60 | EXCLUSION
| RTL
, // Phli
170 0x10B8F | EXCLUSION
| RTL
, // Phlp
172 0x10B40 | EXCLUSION
| RTL
, // Prti
173 0x0800 | EXCLUSION
| RTL
, // Samr
174 0xAA80 | LIMITED_USE
| LB_LETTERS
, // Tavt
177 0xA6A0 | LIMITED_USE
, // Bamu
178 0xA4D0 | LIMITED_USE
, // Lisu
180 0x10A60 | EXCLUSION
| RTL
, // Sarb
181 0x16AE6 | EXCLUSION
, // Bass
182 0x1BC20 | EXCLUSION
, // Dupl
183 0x10500 | EXCLUSION
, // Elba
184 0x11315 | EXCLUSION
, // Gran
187 0x1E802 | EXCLUSION
| RTL
, // Mend
188 0x109A0 | EXCLUSION
| RTL
, // Merc
189 0x10A95 | EXCLUSION
| RTL
, // Narb
190 0x10896 | EXCLUSION
| RTL
, // Nbat
191 0x10873 | EXCLUSION
| RTL
, // Palm
192 0x112BE | EXCLUSION
, // Sind
193 0x118B4 | EXCLUSION
| CASED
, // Wara
196 0x16A4F | EXCLUSION
, // Mroo
197 0x1B1C4 | EXCLUSION
| LB_LETTERS
, // Nshu
198 0x11183 | EXCLUSION
, // Shrd
199 0x110D0 | EXCLUSION
, // Sora
200 0x11680 | EXCLUSION
, // Takr
201 0x18229 | EXCLUSION
| LB_LETTERS
, // Tang
203 0x14400 | EXCLUSION
, // Hluw
204 0x11208 | EXCLUSION
, // Khoj
205 0x11484 | EXCLUSION
, // Tirh
206 0x10537 | EXCLUSION
, // Aghb
207 0x11152 | EXCLUSION
, // Mahj
208 0x11717 | EXCLUSION
| LB_LETTERS
, // Ahom
209 0x108F4 | EXCLUSION
| RTL
, // Hatr
210 0x1160E | EXCLUSION
, // Modi
211 0x1128F | EXCLUSION
, // Mult
212 0x11AC0 | EXCLUSION
, // Pauc
213 0x1158E | EXCLUSION
, // Sidd
214 0x1E909 | LIMITED_USE
| RTL
| CASED
, // Adlm
215 0x11C0E | EXCLUSION
, // Bhks
216 0x11C72 | EXCLUSION
, // Marc
217 0x11412 | LIMITED_USE
, // Newa
218 0x104B5 | LIMITED_USE
| CASED
, // Osge
219 0x5B57 | RECOMMENDED
| LB_LETTERS
, // Hanb
220 0x1112 | RECOMMENDED
, // Jamo
222 0x11D10 | EXCLUSION
, // Gonm
223 0x11A5C | EXCLUSION
, // Soyo
224 0x11A0B | EXCLUSION
, // Zanb
225 0x1180B | EXCLUSION
, // Dogr
226 0x11D71 | LIMITED_USE
, // Gong
227 0x11EE5 | EXCLUSION
, // Maka
228 0x16E40 | EXCLUSION
| CASED
, // Medf
229 0x10D12 | LIMITED_USE
| RTL
, // Rohg
230 0x10F42 | EXCLUSION
| RTL
, // Sogd
231 0x10F19 | EXCLUSION
| RTL
, // Sogo
232 0x10FF1 | EXCLUSION
| RTL
, // Elym
233 0x1E108 | LIMITED_USE
, // Hmnp
234 0x119CE | EXCLUSION
, // Nand
235 0x1E2E1 | LIMITED_USE
, // Wcho
236 0x10FBF | EXCLUSION
| RTL
, // Chrs
237 0x1190C | EXCLUSION
, // Diak
238 0x18C65 | EXCLUSION
| LB_LETTERS
, // Kits
239 0x10E88 | EXCLUSION
| RTL
, // Yezi
240 // End copy-paste from parsescriptmetadata.py
243 int32_t getScriptProps(UScriptCode script
) {
244 if (0 <= script
&& script
< UPRV_LENGTHOF(SCRIPT_PROPS
)) {
245 return SCRIPT_PROPS
[script
];
253 U_CAPI
int32_t U_EXPORT2
254 uscript_getSampleString(UScriptCode script
, UChar
*dest
, int32_t capacity
, UErrorCode
*pErrorCode
) {
255 if(U_FAILURE(*pErrorCode
)) { return 0; }
256 if(capacity
< 0 || (capacity
> 0 && dest
== NULL
)) {
257 *pErrorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
260 int32_t sampleChar
= getScriptProps(script
) & 0x1fffff;
262 if(sampleChar
== 0) {
265 length
= U16_LENGTH(sampleChar
);
266 if(length
<= capacity
) {
268 U16_APPEND_UNSAFE(dest
, i
, sampleChar
);
271 return u_terminateUChars(dest
, capacity
, length
, pErrorCode
);
274 U_COMMON_API
icu::UnicodeString U_EXPORT2
275 uscript_getSampleUnicodeString(UScriptCode script
) {
276 icu::UnicodeString sample
;
277 int32_t sampleChar
= getScriptProps(script
) & 0x1fffff;
278 if(sampleChar
!= 0) {
279 sample
.append(sampleChar
);
284 U_CAPI UScriptUsage U_EXPORT2
285 uscript_getUsage(UScriptCode script
) {
286 return (UScriptUsage
)((getScriptProps(script
) >> 21) & 7);
289 U_CAPI UBool U_EXPORT2
290 uscript_isRightToLeft(UScriptCode script
) {
291 return (getScriptProps(script
) & RTL
) != 0;
294 U_CAPI UBool U_EXPORT2
295 uscript_breaksBetweenLetters(UScriptCode script
) {
296 return (getScriptProps(script
) & LB_LETTERS
) != 0;
299 U_CAPI UBool U_EXPORT2
300 uscript_isCased(UScriptCode script
) {
301 return (getScriptProps(script
) & CASED
) != 0;