2 *******************************************************************************
3 * Copyright (C) 2013, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: uscript_props.cpp
8 * tab size: 8 (not used)
11 * created on: 2013feb16
12 * created by: Markus W. Scherer
15 #include "unicode/utypes.h"
16 #include "unicode/unistr.h"
17 #include "unicode/uscript.h"
18 #include "unicode/utf16.h"
21 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
25 // Script metadata (script properties).
26 // See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
28 // 0 = NOT_ENCODED, no sample character, default false script properties.
29 // Bits 20.. 0: sample character
32 const int32_t UNKNOWN
= 1 << 21;
33 const int32_t EXCLUSION
= 2 << 21;
34 const int32_t LIMITED_USE
= 3 << 21;
35 const int32_t ASPIRATIONAL
= 4 << 21;
36 const int32_t RECOMMENDED
= 5 << 21;
38 // Bits 31..24: Single-bit flags
39 const int32_t RTL
= 1 << 24;
40 const int32_t LB_LETTERS
= 1 << 25;
41 const int32_t CASED
= 1 << 26;
43 const int32_t SCRIPT_PROPS
[] = {
44 // Begin copy-paste output from
45 // tools/trunk/unicode/py/parsescriptmetadata.py
46 0x0040 | UNKNOWN
, // Zyyy
47 0x0308 | UNKNOWN
, // Zinh
48 0x0628 | RECOMMENDED
| RTL
, // Arab
49 0x0531 | RECOMMENDED
| CASED
, // Armn
50 0x0995 | RECOMMENDED
, // Beng
51 0x3105 | RECOMMENDED
| LB_LETTERS
, // Bopo
52 0x13C4 | LIMITED_USE
, // Cher
53 0x03E2 | EXCLUSION
| CASED
, // Copt
54 0x042F | RECOMMENDED
| CASED
, // Cyrl
55 0x10414 | EXCLUSION
| CASED
, // Dsrt
56 0x0905 | RECOMMENDED
, // Deva
57 0x12A0 | RECOMMENDED
, // Ethi
58 0x10D3 | RECOMMENDED
, // Geor
59 0x10330 | EXCLUSION
, // Goth
60 0x03A9 | RECOMMENDED
| CASED
, // Grek
61 0x0A95 | RECOMMENDED
, // Gujr
62 0x0A15 | RECOMMENDED
, // Guru
63 0x5B57 | RECOMMENDED
| LB_LETTERS
, // Hani
64 0xAC00 | RECOMMENDED
, // Hang
65 0x05D0 | RECOMMENDED
| RTL
, // Hebr
66 0x304B | RECOMMENDED
| LB_LETTERS
, // Hira
67 0x0C95 | RECOMMENDED
, // Knda
68 0x30AB | RECOMMENDED
| LB_LETTERS
, // Kana
69 0x1780 | RECOMMENDED
| LB_LETTERS
, // Khmr
70 0x0EA5 | RECOMMENDED
| LB_LETTERS
, // Laoo
71 0x004C | RECOMMENDED
| CASED
, // Latn
72 0x0D15 | RECOMMENDED
, // Mlym
73 0x1826 | ASPIRATIONAL
, // Mong
74 0x1000 | RECOMMENDED
| LB_LETTERS
, // Mymr
75 0x168F | EXCLUSION
, // Ogam
76 0x10300 | EXCLUSION
, // Ital
77 0x0B15 | RECOMMENDED
, // Orya
78 0x16A0 | EXCLUSION
, // Runr
79 0x0D85 | RECOMMENDED
, // Sinh
80 0x0710 | LIMITED_USE
| RTL
, // Syrc
81 0x0B95 | RECOMMENDED
, // Taml
82 0x0C15 | RECOMMENDED
, // Telu
83 0x078C | RECOMMENDED
| RTL
, // Thaa
84 0x0E17 | RECOMMENDED
| LB_LETTERS
, // Thai
85 0x0F40 | RECOMMENDED
, // Tibt
86 0x14C0 | ASPIRATIONAL
, // Cans
87 0xA288 | ASPIRATIONAL
| LB_LETTERS
, // Yiii
88 0x1703 | EXCLUSION
, // Tglg
89 0x1723 | EXCLUSION
, // Hano
90 0x1743 | EXCLUSION
, // Buhd
91 0x1763 | EXCLUSION
, // Tagb
92 0x2800 | UNKNOWN
, // Brai
93 0x10800 | EXCLUSION
| RTL
, // Cprt
94 0x1900 | LIMITED_USE
, // Limb
95 0x10000 | EXCLUSION
, // Linb
96 0x10480 | EXCLUSION
, // Osma
97 0x10450 | EXCLUSION
, // Shaw
98 0x1950 | LIMITED_USE
| LB_LETTERS
, // Tale
99 0x10380 | EXCLUSION
, // Ugar
101 0x1A00 | EXCLUSION
, // Bugi
102 0x2C00 | EXCLUSION
| CASED
, // Glag
103 0x10A00 | EXCLUSION
| RTL
, // Khar
104 0xA800 | LIMITED_USE
, // Sylo
105 0x1980 | LIMITED_USE
| LB_LETTERS
, // Talu
106 0x2D30 | ASPIRATIONAL
, // Tfng
107 0x103A0 | EXCLUSION
, // Xpeo
108 0x1B05 | LIMITED_USE
| LB_LETTERS
, // Bali
109 0x1BC0 | LIMITED_USE
, // Batk
111 0x11005 | EXCLUSION
, // Brah
112 0xAA00 | LIMITED_USE
, // Cham
117 0x13153 | EXCLUSION
, // Egyp
119 0x5B57 | RECOMMENDED
| LB_LETTERS
, // Hans
120 0x5B57 | RECOMMENDED
| LB_LETTERS
, // Hant
124 0xA984 | LIMITED_USE
| LB_LETTERS
, // Java
125 0xA90A | LIMITED_USE
, // Kali
128 0x1C00 | LIMITED_USE
, // Lepc
130 0x0840 | LIMITED_USE
| RTL
, // Mand
132 0x10980 | EXCLUSION
| RTL
, // Mero
133 0x07CA | LIMITED_USE
| RTL
, // Nkoo
134 0x10C00 | EXCLUSION
| RTL
, // Orkh
136 0xA840 | EXCLUSION
, // Phag
137 0x10900 | EXCLUSION
| RTL
, // Phnx
138 0x16F00 | ASPIRATIONAL
, // Plrd
145 0xA549 | LIMITED_USE
, // Vaii
147 0x12000 | EXCLUSION
, // Xsux
149 0xFDD0 | UNKNOWN
, // Zzzz
150 0x102A0 | EXCLUSION
, // Cari
151 0x304B | RECOMMENDED
| LB_LETTERS
, // Jpan
152 0x1A20 | LIMITED_USE
| LB_LETTERS
, // Lana
153 0x10280 | EXCLUSION
, // Lyci
154 0x10920 | EXCLUSION
| RTL
, // Lydi
155 0x1C5A | LIMITED_USE
, // Olck
156 0xA930 | EXCLUSION
, // Rjng
157 0xA882 | LIMITED_USE
, // Saur
159 0x1B83 | LIMITED_USE
, // Sund
161 0xABC0 | LIMITED_USE
, // Mtei
162 0x10840 | EXCLUSION
| RTL
, // Armi
163 0x10B00 | EXCLUSION
| RTL
, // Avst
164 0x11103 | LIMITED_USE
, // Cakm
165 0xAC00 | RECOMMENDED
, // Kore
166 0x11083 | EXCLUSION
, // Kthi
168 0x10B60 | EXCLUSION
| RTL
, // Phli
171 0x10B40 | EXCLUSION
| RTL
, // Prti
172 0x0800 | EXCLUSION
| RTL
, // Samr
173 0xAA80 | LIMITED_USE
| LB_LETTERS
, // Tavt
176 0xA6A0 | LIMITED_USE
, // Bamu
177 0xA4D0 | LIMITED_USE
, // Lisu
179 0x10A60 | EXCLUSION
| RTL
, // Sarb
187 0x109A0 | EXCLUSION
| RTL
, // Merc
197 0x11183 | EXCLUSION
, // Shrd
198 0x110D0 | EXCLUSION
, // Sora
199 0x11680 | EXCLUSION
, // Takr
205 // End copy-paste from parsescriptmetadata.py
208 int32_t getScriptProps(UScriptCode script
) {
209 if (0 <= script
&& script
< LENGTHOF(SCRIPT_PROPS
)) {
210 return SCRIPT_PROPS
[script
];
218 U_CAPI
int32_t U_EXPORT2
219 uscript_getSampleString(UScriptCode script
, UChar
*dest
, int32_t capacity
, UErrorCode
*pErrorCode
) {
220 if(U_FAILURE(*pErrorCode
)) { return 0; }
221 if(capacity
< 0 || (capacity
> 0 && dest
== NULL
)) {
222 *pErrorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
225 int32_t sampleChar
= getScriptProps(script
) & 0x1fffff;
227 if(sampleChar
== 0) {
230 length
= U16_LENGTH(sampleChar
);
231 if(length
<= capacity
) {
233 U16_APPEND_UNSAFE(dest
, i
, sampleChar
);
236 return u_terminateUChars(dest
, capacity
, length
, pErrorCode
);
239 U_COMMON_API
icu::UnicodeString U_EXPORT2
240 uscript_getSampleUnicodeString(UScriptCode script
) {
241 icu::UnicodeString sample
;
242 int32_t sampleChar
= getScriptProps(script
) & 0x1fffff;
243 if(sampleChar
!= 0) {
244 sample
.append(sampleChar
);
249 U_CAPI UScriptUsage U_EXPORT2
250 uscript_getUsage(UScriptCode script
) {
251 return (UScriptUsage
)((getScriptProps(script
) >> 21) & 7);
254 U_CAPI UBool U_EXPORT2
255 uscript_isRightToLeft(UScriptCode script
) {
256 return (getScriptProps(script
) & RTL
) != 0;
259 U_CAPI UBool U_EXPORT2
260 uscript_breaksBetweenLetters(UScriptCode script
) {
261 return (getScriptProps(script
) & LB_LETTERS
) != 0;
264 U_CAPI UBool U_EXPORT2
265 uscript_isCased(UScriptCode script
) {
266 return (getScriptProps(script
) & CASED
) != 0;