]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/uscript_props.cpp
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / common / uscript_props.cpp
CommitLineData
51004dcb
A
1/*
2*******************************************************************************
2ca993e8 3* Copyright (C) 2013-2015, International Business Machines
51004dcb
A
4* Corporation and others. All Rights Reserved.
5*******************************************************************************
6* file name: uscript_props.cpp
7* encoding: US-ASCII
8* tab size: 8 (not used)
9* indentation:4
10*
11* created on: 2013feb16
12* created by: Markus W. Scherer
13*/
14
15#include "unicode/utypes.h"
16#include "unicode/unistr.h"
17#include "unicode/uscript.h"
18#include "unicode/utf16.h"
19#include "ustr_imp.h"
b331163b 20#include "cmemory.h"
51004dcb
A
21
22namespace {
23
24// Script metadata (script properties).
25// See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
26
27// 0 = NOT_ENCODED, no sample character, default false script properties.
28// Bits 20.. 0: sample character
29
30// Bits 23..21: usage
31const int32_t UNKNOWN = 1 << 21;
32const int32_t EXCLUSION = 2 << 21;
33const int32_t LIMITED_USE = 3 << 21;
34const int32_t ASPIRATIONAL = 4 << 21;
35const int32_t RECOMMENDED = 5 << 21;
36
37// Bits 31..24: Single-bit flags
38const int32_t RTL = 1 << 24;
39const int32_t LB_LETTERS = 1 << 25;
40const int32_t CASED = 1 << 26;
41
42const int32_t SCRIPT_PROPS[] = {
43 // Begin copy-paste output from
44 // tools/trunk/unicode/py/parsescriptmetadata.py
b331163b 45 0x0040 | RECOMMENDED, // Zyyy
2ca993e8 46 0x0308 | RECOMMENDED, // Zinh
51004dcb
A
47 0x0628 | RECOMMENDED | RTL, // Arab
48 0x0531 | RECOMMENDED | CASED, // Armn
49 0x0995 | RECOMMENDED, // Beng
50 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo
2ca993e8 51 0x13C4 | LIMITED_USE | CASED, // Cher
51004dcb
A
52 0x03E2 | EXCLUSION | CASED, // Copt
53 0x042F | RECOMMENDED | CASED, // Cyrl
54 0x10414 | EXCLUSION | CASED, // Dsrt
55 0x0905 | RECOMMENDED, // Deva
56 0x12A0 | RECOMMENDED, // Ethi
57 0x10D3 | RECOMMENDED, // Geor
58 0x10330 | EXCLUSION, // Goth
59 0x03A9 | RECOMMENDED | CASED, // Grek
60 0x0A95 | RECOMMENDED, // Gujr
61 0x0A15 | RECOMMENDED, // Guru
62 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani
63 0xAC00 | RECOMMENDED, // Hang
64 0x05D0 | RECOMMENDED | RTL, // Hebr
65 0x304B | RECOMMENDED | LB_LETTERS, // Hira
66 0x0C95 | RECOMMENDED, // Knda
67 0x30AB | RECOMMENDED | LB_LETTERS, // Kana
68 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr
69 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo
70 0x004C | RECOMMENDED | CASED, // Latn
71 0x0D15 | RECOMMENDED, // Mlym
72 0x1826 | ASPIRATIONAL, // Mong
73 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr
74 0x168F | EXCLUSION, // Ogam
2ca993e8 75 0x10308 | EXCLUSION, // Ital
51004dcb
A
76 0x0B15 | RECOMMENDED, // Orya
77 0x16A0 | EXCLUSION, // Runr
78 0x0D85 | RECOMMENDED, // Sinh
79 0x0710 | LIMITED_USE | RTL, // Syrc
80 0x0B95 | RECOMMENDED, // Taml
81 0x0C15 | RECOMMENDED, // Telu
82 0x078C | RECOMMENDED | RTL, // Thaa
83 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai
84 0x0F40 | RECOMMENDED, // Tibt
85 0x14C0 | ASPIRATIONAL, // Cans
86 0xA288 | ASPIRATIONAL | LB_LETTERS, // Yiii
87 0x1703 | EXCLUSION, // Tglg
88 0x1723 | EXCLUSION, // Hano
89 0x1743 | EXCLUSION, // Buhd
90 0x1763 | EXCLUSION, // Tagb
2ca993e8 91 0x280E | UNKNOWN, // Brai
51004dcb
A
92 0x10800 | EXCLUSION | RTL, // Cprt
93 0x1900 | LIMITED_USE, // Limb
94 0x10000 | EXCLUSION, // Linb
95 0x10480 | EXCLUSION, // Osma
96 0x10450 | EXCLUSION, // Shaw
97 0x1950 | LIMITED_USE | LB_LETTERS, // Tale
98 0x10380 | EXCLUSION, // Ugar
99 0,
100 0x1A00 | EXCLUSION, // Bugi
101 0x2C00 | EXCLUSION | CASED, // Glag
102 0x10A00 | EXCLUSION | RTL, // Khar
103 0xA800 | LIMITED_USE, // Sylo
104 0x1980 | LIMITED_USE | LB_LETTERS, // Talu
2ca993e8 105 0x2D5E | ASPIRATIONAL, // Tfng
51004dcb 106 0x103A0 | EXCLUSION, // Xpeo
b331163b 107 0x1B05 | LIMITED_USE, // Bali
51004dcb
A
108 0x1BC0 | LIMITED_USE, // Batk
109 0,
110 0x11005 | EXCLUSION, // Brah
111 0xAA00 | LIMITED_USE, // Cham
112 0,
113 0,
114 0,
115 0,
116 0x13153 | EXCLUSION, // Egyp
117 0,
118 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans
119 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant
b331163b 120 0x16B1C | EXCLUSION, // Hmng
2ca993e8 121 0x10CA1 | EXCLUSION | RTL | CASED, // Hung
51004dcb 122 0,
b331163b 123 0xA984 | LIMITED_USE, // Java
51004dcb
A
124 0xA90A | LIMITED_USE, // Kali
125 0,
126 0,
127 0x1C00 | LIMITED_USE, // Lepc
b331163b 128 0x10647 | EXCLUSION, // Lina
51004dcb
A
129 0x0840 | LIMITED_USE | RTL, // Mand
130 0,
131 0x10980 | EXCLUSION | RTL, // Mero
2ca993e8 132 0x07D8 | LIMITED_USE | RTL, // Nkoo
51004dcb 133 0x10C00 | EXCLUSION | RTL, // Orkh
b331163b 134 0x1036B | EXCLUSION, // Perm
51004dcb
A
135 0xA840 | EXCLUSION, // Phag
136 0x10900 | EXCLUSION | RTL, // Phnx
137 0x16F00 | ASPIRATIONAL, // Plrd
138 0,
139 0,
140 0,
141 0,
142 0,
143 0,
144 0xA549 | LIMITED_USE, // Vaii
145 0,
146 0x12000 | EXCLUSION, // Xsux
147 0,
148 0xFDD0 | UNKNOWN, // Zzzz
2ca993e8 149 0x102B7 | EXCLUSION, // Cari
51004dcb
A
150 0x304B | RECOMMENDED | LB_LETTERS, // Jpan
151 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana
152 0x10280 | EXCLUSION, // Lyci
153 0x10920 | EXCLUSION | RTL, // Lydi
154 0x1C5A | LIMITED_USE, // Olck
155 0xA930 | EXCLUSION, // Rjng
156 0xA882 | LIMITED_USE, // Saur
2ca993e8 157 0x1D850 | EXCLUSION, // Sgnw
51004dcb
A
158 0x1B83 | LIMITED_USE, // Sund
159 0,
160 0xABC0 | LIMITED_USE, // Mtei
161 0x10840 | EXCLUSION | RTL, // Armi
162 0x10B00 | EXCLUSION | RTL, // Avst
163 0x11103 | LIMITED_USE, // Cakm
164 0xAC00 | RECOMMENDED, // Kore
165 0x11083 | EXCLUSION, // Kthi
2ca993e8 166 0x10AC1 | EXCLUSION | RTL, // Mani
51004dcb 167 0x10B60 | EXCLUSION | RTL, // Phli
b331163b 168 0x10B8F | EXCLUSION | RTL, // Phlp
51004dcb
A
169 0,
170 0x10B40 | EXCLUSION | RTL, // Prti
171 0x0800 | EXCLUSION | RTL, // Samr
172 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt
173 0,
174 0,
175 0xA6A0 | LIMITED_USE, // Bamu
2ca993e8 176 0xA4E8 | LIMITED_USE, // Lisu
51004dcb
A
177 0,
178 0x10A60 | EXCLUSION | RTL, // Sarb
b331163b
A
179 0x16AE6 | EXCLUSION, // Bass
180 0x1BC20 | EXCLUSION, // Dupl
181 0x10500 | EXCLUSION, // Elba
182 0x11315 | EXCLUSION, // Gran
51004dcb
A
183 0,
184 0,
b331163b 185 0x1E802 | EXCLUSION | RTL, // Mend
51004dcb 186 0x109A0 | EXCLUSION | RTL, // Merc
b331163b
A
187 0x10A95 | EXCLUSION | RTL, // Narb
188 0x10896 | EXCLUSION | RTL, // Nbat
189 0x10873 | EXCLUSION | RTL, // Palm
190 0x112BE | EXCLUSION, // Sind
191 0x118B4 | EXCLUSION | CASED, // Wara
51004dcb
A
192 0,
193 0,
b331163b 194 0x16A4F | EXCLUSION, // Mroo
51004dcb
A
195 0,
196 0x11183 | EXCLUSION, // Shrd
197 0x110D0 | EXCLUSION, // Sora
198 0x11680 | EXCLUSION, // Takr
199 0,
200 0,
2ca993e8 201 0x14400 | EXCLUSION, // Hluw
b331163b
A
202 0x11208 | EXCLUSION, // Khoj
203 0x11484 | EXCLUSION, // Tirh
204 0x10537 | EXCLUSION, // Aghb
205 0x11152 | EXCLUSION, // Mahj
2ca993e8
A
206 0x11717 | EXCLUSION | LB_LETTERS, // Ahom
207 0x108F4 | EXCLUSION | RTL, // Hatr
b331163b 208 0x1160E | EXCLUSION, // Modi
2ca993e8 209 0x1128F | EXCLUSION, // Mult
b331163b
A
210 0x11AC0 | EXCLUSION, // Pauc
211 0x1158E | EXCLUSION, // Sidd
51004dcb
A
212 // End copy-paste from parsescriptmetadata.py
213};
214
215int32_t getScriptProps(UScriptCode script) {
b331163b 216 if (0 <= script && script < UPRV_LENGTHOF(SCRIPT_PROPS)) {
51004dcb
A
217 return SCRIPT_PROPS[script];
218 } else {
219 return 0;
220 }
221}
222
223} // namespace
224
225U_CAPI int32_t U_EXPORT2
226uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) {
227 if(U_FAILURE(*pErrorCode)) { return 0; }
228 if(capacity < 0 || (capacity > 0 && dest == NULL)) {
229 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
230 return 0;
231 }
232 int32_t sampleChar = getScriptProps(script) & 0x1fffff;
233 int32_t length;
234 if(sampleChar == 0) {
235 length = 0;
236 } else {
237 length = U16_LENGTH(sampleChar);
238 if(length <= capacity) {
239 int32_t i = 0;
240 U16_APPEND_UNSAFE(dest, i, sampleChar);
241 }
242 }
243 return u_terminateUChars(dest, capacity, length, pErrorCode);
244}
245
246U_COMMON_API icu::UnicodeString U_EXPORT2
247uscript_getSampleUnicodeString(UScriptCode script) {
248 icu::UnicodeString sample;
249 int32_t sampleChar = getScriptProps(script) & 0x1fffff;
250 if(sampleChar != 0) {
251 sample.append(sampleChar);
252 }
253 return sample;
254}
255
256U_CAPI UScriptUsage U_EXPORT2
257uscript_getUsage(UScriptCode script) {
258 return (UScriptUsage)((getScriptProps(script) >> 21) & 7);
259}
260
261U_CAPI UBool U_EXPORT2
262uscript_isRightToLeft(UScriptCode script) {
263 return (getScriptProps(script) & RTL) != 0;
264}
265
266U_CAPI UBool U_EXPORT2
267uscript_breaksBetweenLetters(UScriptCode script) {
268 return (getScriptProps(script) & LB_LETTERS) != 0;
269}
270
271U_CAPI UBool U_EXPORT2
272uscript_isCased(UScriptCode script) {
273 return (getScriptProps(script) & CASED) != 0;
274}