]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | ********************************************************************** | |
3 | * Copyright (C) 1997-2015, International Business Machines | |
4 | * Corporation and others. All Rights Reserved. | |
5 | ********************************************************************** | |
6 | * | |
7 | * File USCRIPT.H | |
8 | * | |
9 | * Modification History: | |
10 | * | |
11 | * Date Name Description | |
12 | * 07/06/2001 Ram Creation. | |
13 | ****************************************************************************** | |
14 | */ | |
15 | ||
16 | #ifndef USCRIPT_H | |
17 | #define USCRIPT_H | |
18 | #include "unicode/utypes.h" | |
19 | ||
20 | /** | |
21 | * \file | |
22 | * \brief C API: Unicode Script Information | |
23 | */ | |
24 | ||
25 | /** | |
26 | * Constants for ISO 15924 script codes. | |
27 | * | |
28 | * The current set of script code constants supports at least all scripts | |
29 | * that are encoded in the version of Unicode which ICU currently supports. | |
30 | * The names of the constants are usually derived from the | |
31 | * Unicode script property value aliases. | |
32 | * See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/) | |
33 | * and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt . | |
34 | * | |
35 | * Starting with ICU 3.6, constants for most ISO 15924 script codes | |
36 | * are included, for use with language tags, CLDR data, and similar. | |
37 | * Some of those codes are not used in the Unicode Character Database (UCD). | |
38 | * For example, there are no characters that have a UCD script property value of | |
39 | * Hans or Hant. All Han ideographs have the Hani script property value in Unicode. | |
40 | * | |
41 | * Private-use codes Qaaa..Qabx are not included. | |
42 | * | |
43 | * Starting with ICU 55, script codes are only added when their scripts | |
44 | * have been or will certainly be encoded in Unicode, | |
45 | * and have been assigned Unicode script property value aliases, | |
46 | * to ensure that their script names are stable and match the names of the constants. | |
47 | * Script codes like Latf and Aran that are not subject to separate encoding | |
48 | * may be added at any time. | |
49 | * | |
50 | * @stable ICU 2.2 | |
51 | */ | |
52 | typedef enum UScriptCode { | |
53 | /* | |
54 | * Note: UScriptCode constants and their ISO script code comments | |
55 | * are parsed by preparseucd.py. | |
56 | * It matches lines like | |
57 | * USCRIPT_<Unicode Script value name> = <integer>, / * <ISO script code> * / | |
58 | */ | |
59 | ||
60 | /** @stable ICU 2.2 */ | |
61 | USCRIPT_INVALID_CODE = -1, | |
62 | /** @stable ICU 2.2 */ | |
63 | USCRIPT_COMMON = 0, /* Zyyy */ | |
64 | /** @stable ICU 2.2 */ | |
65 | USCRIPT_INHERITED = 1, /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */ | |
66 | /** @stable ICU 2.2 */ | |
67 | USCRIPT_ARABIC = 2, /* Arab */ | |
68 | /** @stable ICU 2.2 */ | |
69 | USCRIPT_ARMENIAN = 3, /* Armn */ | |
70 | /** @stable ICU 2.2 */ | |
71 | USCRIPT_BENGALI = 4, /* Beng */ | |
72 | /** @stable ICU 2.2 */ | |
73 | USCRIPT_BOPOMOFO = 5, /* Bopo */ | |
74 | /** @stable ICU 2.2 */ | |
75 | USCRIPT_CHEROKEE = 6, /* Cher */ | |
76 | /** @stable ICU 2.2 */ | |
77 | USCRIPT_COPTIC = 7, /* Copt */ | |
78 | /** @stable ICU 2.2 */ | |
79 | USCRIPT_CYRILLIC = 8, /* Cyrl */ | |
80 | /** @stable ICU 2.2 */ | |
81 | USCRIPT_DESERET = 9, /* Dsrt */ | |
82 | /** @stable ICU 2.2 */ | |
83 | USCRIPT_DEVANAGARI = 10, /* Deva */ | |
84 | /** @stable ICU 2.2 */ | |
85 | USCRIPT_ETHIOPIC = 11, /* Ethi */ | |
86 | /** @stable ICU 2.2 */ | |
87 | USCRIPT_GEORGIAN = 12, /* Geor */ | |
88 | /** @stable ICU 2.2 */ | |
89 | USCRIPT_GOTHIC = 13, /* Goth */ | |
90 | /** @stable ICU 2.2 */ | |
91 | USCRIPT_GREEK = 14, /* Grek */ | |
92 | /** @stable ICU 2.2 */ | |
93 | USCRIPT_GUJARATI = 15, /* Gujr */ | |
94 | /** @stable ICU 2.2 */ | |
95 | USCRIPT_GURMUKHI = 16, /* Guru */ | |
96 | /** @stable ICU 2.2 */ | |
97 | USCRIPT_HAN = 17, /* Hani */ | |
98 | /** @stable ICU 2.2 */ | |
99 | USCRIPT_HANGUL = 18, /* Hang */ | |
100 | /** @stable ICU 2.2 */ | |
101 | USCRIPT_HEBREW = 19, /* Hebr */ | |
102 | /** @stable ICU 2.2 */ | |
103 | USCRIPT_HIRAGANA = 20, /* Hira */ | |
104 | /** @stable ICU 2.2 */ | |
105 | USCRIPT_KANNADA = 21, /* Knda */ | |
106 | /** @stable ICU 2.2 */ | |
107 | USCRIPT_KATAKANA = 22, /* Kana */ | |
108 | /** @stable ICU 2.2 */ | |
109 | USCRIPT_KHMER = 23, /* Khmr */ | |
110 | /** @stable ICU 2.2 */ | |
111 | USCRIPT_LAO = 24, /* Laoo */ | |
112 | /** @stable ICU 2.2 */ | |
113 | USCRIPT_LATIN = 25, /* Latn */ | |
114 | /** @stable ICU 2.2 */ | |
115 | USCRIPT_MALAYALAM = 26, /* Mlym */ | |
116 | /** @stable ICU 2.2 */ | |
117 | USCRIPT_MONGOLIAN = 27, /* Mong */ | |
118 | /** @stable ICU 2.2 */ | |
119 | USCRIPT_MYANMAR = 28, /* Mymr */ | |
120 | /** @stable ICU 2.2 */ | |
121 | USCRIPT_OGHAM = 29, /* Ogam */ | |
122 | /** @stable ICU 2.2 */ | |
123 | USCRIPT_OLD_ITALIC = 30, /* Ital */ | |
124 | /** @stable ICU 2.2 */ | |
125 | USCRIPT_ORIYA = 31, /* Orya */ | |
126 | /** @stable ICU 2.2 */ | |
127 | USCRIPT_RUNIC = 32, /* Runr */ | |
128 | /** @stable ICU 2.2 */ | |
129 | USCRIPT_SINHALA = 33, /* Sinh */ | |
130 | /** @stable ICU 2.2 */ | |
131 | USCRIPT_SYRIAC = 34, /* Syrc */ | |
132 | /** @stable ICU 2.2 */ | |
133 | USCRIPT_TAMIL = 35, /* Taml */ | |
134 | /** @stable ICU 2.2 */ | |
135 | USCRIPT_TELUGU = 36, /* Telu */ | |
136 | /** @stable ICU 2.2 */ | |
137 | USCRIPT_THAANA = 37, /* Thaa */ | |
138 | /** @stable ICU 2.2 */ | |
139 | USCRIPT_THAI = 38, /* Thai */ | |
140 | /** @stable ICU 2.2 */ | |
141 | USCRIPT_TIBETAN = 39, /* Tibt */ | |
142 | /** Canadian_Aboriginal script. @stable ICU 2.6 */ | |
143 | USCRIPT_CANADIAN_ABORIGINAL = 40, /* Cans */ | |
144 | /** Canadian_Aboriginal script (alias). @stable ICU 2.2 */ | |
145 | USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL, | |
146 | /** @stable ICU 2.2 */ | |
147 | USCRIPT_YI = 41, /* Yiii */ | |
148 | /* New scripts in Unicode 3.2 */ | |
149 | /** @stable ICU 2.2 */ | |
150 | USCRIPT_TAGALOG = 42, /* Tglg */ | |
151 | /** @stable ICU 2.2 */ | |
152 | USCRIPT_HANUNOO = 43, /* Hano */ | |
153 | /** @stable ICU 2.2 */ | |
154 | USCRIPT_BUHID = 44, /* Buhd */ | |
155 | /** @stable ICU 2.2 */ | |
156 | USCRIPT_TAGBANWA = 45, /* Tagb */ | |
157 | ||
158 | /* New scripts in Unicode 4 */ | |
159 | /** @stable ICU 2.6 */ | |
160 | USCRIPT_BRAILLE = 46, /* Brai */ | |
161 | /** @stable ICU 2.6 */ | |
162 | USCRIPT_CYPRIOT = 47, /* Cprt */ | |
163 | /** @stable ICU 2.6 */ | |
164 | USCRIPT_LIMBU = 48, /* Limb */ | |
165 | /** @stable ICU 2.6 */ | |
166 | USCRIPT_LINEAR_B = 49, /* Linb */ | |
167 | /** @stable ICU 2.6 */ | |
168 | USCRIPT_OSMANYA = 50, /* Osma */ | |
169 | /** @stable ICU 2.6 */ | |
170 | USCRIPT_SHAVIAN = 51, /* Shaw */ | |
171 | /** @stable ICU 2.6 */ | |
172 | USCRIPT_TAI_LE = 52, /* Tale */ | |
173 | /** @stable ICU 2.6 */ | |
174 | USCRIPT_UGARITIC = 53, /* Ugar */ | |
175 | ||
176 | /** New script code in Unicode 4.0.1 @stable ICU 3.0 */ | |
177 | USCRIPT_KATAKANA_OR_HIRAGANA = 54,/*Hrkt */ | |
178 | ||
179 | /* New scripts in Unicode 4.1 */ | |
180 | /** @stable ICU 3.4 */ | |
181 | USCRIPT_BUGINESE = 55, /* Bugi */ | |
182 | /** @stable ICU 3.4 */ | |
183 | USCRIPT_GLAGOLITIC = 56, /* Glag */ | |
184 | /** @stable ICU 3.4 */ | |
185 | USCRIPT_KHAROSHTHI = 57, /* Khar */ | |
186 | /** @stable ICU 3.4 */ | |
187 | USCRIPT_SYLOTI_NAGRI = 58, /* Sylo */ | |
188 | /** @stable ICU 3.4 */ | |
189 | USCRIPT_NEW_TAI_LUE = 59, /* Talu */ | |
190 | /** @stable ICU 3.4 */ | |
191 | USCRIPT_TIFINAGH = 60, /* Tfng */ | |
192 | /** @stable ICU 3.4 */ | |
193 | USCRIPT_OLD_PERSIAN = 61, /* Xpeo */ | |
194 | ||
195 | /* New script codes from Unicode and ISO 15924 */ | |
196 | /** @stable ICU 3.6 */ | |
197 | USCRIPT_BALINESE = 62, /* Bali */ | |
198 | /** @stable ICU 3.6 */ | |
199 | USCRIPT_BATAK = 63, /* Batk */ | |
200 | /** @stable ICU 3.6 */ | |
201 | USCRIPT_BLISSYMBOLS = 64, /* Blis */ | |
202 | /** @stable ICU 3.6 */ | |
203 | USCRIPT_BRAHMI = 65, /* Brah */ | |
204 | /** @stable ICU 3.6 */ | |
205 | USCRIPT_CHAM = 66, /* Cham */ | |
206 | /** @stable ICU 3.6 */ | |
207 | USCRIPT_CIRTH = 67, /* Cirt */ | |
208 | /** @stable ICU 3.6 */ | |
209 | USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68, /* Cyrs */ | |
210 | /** @stable ICU 3.6 */ | |
211 | USCRIPT_DEMOTIC_EGYPTIAN = 69, /* Egyd */ | |
212 | /** @stable ICU 3.6 */ | |
213 | USCRIPT_HIERATIC_EGYPTIAN = 70, /* Egyh */ | |
214 | /** @stable ICU 3.6 */ | |
215 | USCRIPT_EGYPTIAN_HIEROGLYPHS = 71, /* Egyp */ | |
216 | /** @stable ICU 3.6 */ | |
217 | USCRIPT_KHUTSURI = 72, /* Geok */ | |
218 | /** @stable ICU 3.6 */ | |
219 | USCRIPT_SIMPLIFIED_HAN = 73, /* Hans */ | |
220 | /** @stable ICU 3.6 */ | |
221 | USCRIPT_TRADITIONAL_HAN = 74, /* Hant */ | |
222 | /** @stable ICU 3.6 */ | |
223 | USCRIPT_PAHAWH_HMONG = 75, /* Hmng */ | |
224 | /** @stable ICU 3.6 */ | |
225 | USCRIPT_OLD_HUNGARIAN = 76, /* Hung */ | |
226 | /** @stable ICU 3.6 */ | |
227 | USCRIPT_HARAPPAN_INDUS = 77, /* Inds */ | |
228 | /** @stable ICU 3.6 */ | |
229 | USCRIPT_JAVANESE = 78, /* Java */ | |
230 | /** @stable ICU 3.6 */ | |
231 | USCRIPT_KAYAH_LI = 79, /* Kali */ | |
232 | /** @stable ICU 3.6 */ | |
233 | USCRIPT_LATIN_FRAKTUR = 80, /* Latf */ | |
234 | /** @stable ICU 3.6 */ | |
235 | USCRIPT_LATIN_GAELIC = 81, /* Latg */ | |
236 | /** @stable ICU 3.6 */ | |
237 | USCRIPT_LEPCHA = 82, /* Lepc */ | |
238 | /** @stable ICU 3.6 */ | |
239 | USCRIPT_LINEAR_A = 83, /* Lina */ | |
240 | /** @stable ICU 4.6 */ | |
241 | USCRIPT_MANDAIC = 84, /* Mand */ | |
242 | /** @stable ICU 3.6 */ | |
243 | USCRIPT_MANDAEAN = USCRIPT_MANDAIC, | |
244 | /** @stable ICU 3.6 */ | |
245 | USCRIPT_MAYAN_HIEROGLYPHS = 85, /* Maya */ | |
246 | /** @stable ICU 4.6 */ | |
247 | USCRIPT_MEROITIC_HIEROGLYPHS = 86, /* Mero */ | |
248 | /** @stable ICU 3.6 */ | |
249 | USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS, | |
250 | /** @stable ICU 3.6 */ | |
251 | USCRIPT_NKO = 87, /* Nkoo */ | |
252 | /** @stable ICU 3.6 */ | |
253 | USCRIPT_ORKHON = 88, /* Orkh */ | |
254 | /** @stable ICU 3.6 */ | |
255 | USCRIPT_OLD_PERMIC = 89, /* Perm */ | |
256 | /** @stable ICU 3.6 */ | |
257 | USCRIPT_PHAGS_PA = 90, /* Phag */ | |
258 | /** @stable ICU 3.6 */ | |
259 | USCRIPT_PHOENICIAN = 91, /* Phnx */ | |
260 | /** @stable ICU 52 */ | |
261 | USCRIPT_MIAO = 92, /* Plrd */ | |
262 | /** @stable ICU 3.6 */ | |
263 | USCRIPT_PHONETIC_POLLARD = USCRIPT_MIAO, | |
264 | /** @stable ICU 3.6 */ | |
265 | USCRIPT_RONGORONGO = 93, /* Roro */ | |
266 | /** @stable ICU 3.6 */ | |
267 | USCRIPT_SARATI = 94, /* Sara */ | |
268 | /** @stable ICU 3.6 */ | |
269 | USCRIPT_ESTRANGELO_SYRIAC = 95, /* Syre */ | |
270 | /** @stable ICU 3.6 */ | |
271 | USCRIPT_WESTERN_SYRIAC = 96, /* Syrj */ | |
272 | /** @stable ICU 3.6 */ | |
273 | USCRIPT_EASTERN_SYRIAC = 97, /* Syrn */ | |
274 | /** @stable ICU 3.6 */ | |
275 | USCRIPT_TENGWAR = 98, /* Teng */ | |
276 | /** @stable ICU 3.6 */ | |
277 | USCRIPT_VAI = 99, /* Vaii */ | |
278 | /** @stable ICU 3.6 */ | |
279 | USCRIPT_VISIBLE_SPEECH = 100,/* Visp */ | |
280 | /** @stable ICU 3.6 */ | |
281 | USCRIPT_CUNEIFORM = 101,/* Xsux */ | |
282 | /** @stable ICU 3.6 */ | |
283 | USCRIPT_UNWRITTEN_LANGUAGES = 102,/* Zxxx */ | |
284 | /** @stable ICU 3.6 */ | |
285 | USCRIPT_UNKNOWN = 103,/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */ | |
286 | ||
287 | /** @stable ICU 3.8 */ | |
288 | USCRIPT_CARIAN = 104,/* Cari */ | |
289 | /** @stable ICU 3.8 */ | |
290 | USCRIPT_JAPANESE = 105,/* Jpan */ | |
291 | /** @stable ICU 3.8 */ | |
292 | USCRIPT_LANNA = 106,/* Lana */ | |
293 | /** @stable ICU 3.8 */ | |
294 | USCRIPT_LYCIAN = 107,/* Lyci */ | |
295 | /** @stable ICU 3.8 */ | |
296 | USCRIPT_LYDIAN = 108,/* Lydi */ | |
297 | /** @stable ICU 3.8 */ | |
298 | USCRIPT_OL_CHIKI = 109,/* Olck */ | |
299 | /** @stable ICU 3.8 */ | |
300 | USCRIPT_REJANG = 110,/* Rjng */ | |
301 | /** @stable ICU 3.8 */ | |
302 | USCRIPT_SAURASHTRA = 111,/* Saur */ | |
303 | /** Sutton SignWriting @stable ICU 3.8 */ | |
304 | USCRIPT_SIGN_WRITING = 112,/* Sgnw */ | |
305 | /** @stable ICU 3.8 */ | |
306 | USCRIPT_SUNDANESE = 113,/* Sund */ | |
307 | /** @stable ICU 3.8 */ | |
308 | USCRIPT_MOON = 114,/* Moon */ | |
309 | /** @stable ICU 3.8 */ | |
310 | USCRIPT_MEITEI_MAYEK = 115,/* Mtei */ | |
311 | ||
312 | /** @stable ICU 4.0 */ | |
313 | USCRIPT_IMPERIAL_ARAMAIC = 116,/* Armi */ | |
314 | /** @stable ICU 4.0 */ | |
315 | USCRIPT_AVESTAN = 117,/* Avst */ | |
316 | /** @stable ICU 4.0 */ | |
317 | USCRIPT_CHAKMA = 118,/* Cakm */ | |
318 | /** @stable ICU 4.0 */ | |
319 | USCRIPT_KOREAN = 119,/* Kore */ | |
320 | /** @stable ICU 4.0 */ | |
321 | USCRIPT_KAITHI = 120,/* Kthi */ | |
322 | /** @stable ICU 4.0 */ | |
323 | USCRIPT_MANICHAEAN = 121,/* Mani */ | |
324 | /** @stable ICU 4.0 */ | |
325 | USCRIPT_INSCRIPTIONAL_PAHLAVI = 122,/* Phli */ | |
326 | /** @stable ICU 4.0 */ | |
327 | USCRIPT_PSALTER_PAHLAVI = 123,/* Phlp */ | |
328 | /** @stable ICU 4.0 */ | |
329 | USCRIPT_BOOK_PAHLAVI = 124,/* Phlv */ | |
330 | /** @stable ICU 4.0 */ | |
331 | USCRIPT_INSCRIPTIONAL_PARTHIAN = 125,/* Prti */ | |
332 | /** @stable ICU 4.0 */ | |
333 | USCRIPT_SAMARITAN = 126,/* Samr */ | |
334 | /** @stable ICU 4.0 */ | |
335 | USCRIPT_TAI_VIET = 127,/* Tavt */ | |
336 | /** @stable ICU 4.0 */ | |
337 | USCRIPT_MATHEMATICAL_NOTATION = 128,/* Zmth */ | |
338 | /** @stable ICU 4.0 */ | |
339 | USCRIPT_SYMBOLS = 129,/* Zsym */ | |
340 | ||
341 | /** @stable ICU 4.4 */ | |
342 | USCRIPT_BAMUM = 130,/* Bamu */ | |
343 | /** @stable ICU 4.4 */ | |
344 | USCRIPT_LISU = 131,/* Lisu */ | |
345 | /** @stable ICU 4.4 */ | |
346 | USCRIPT_NAKHI_GEBA = 132,/* Nkgb */ | |
347 | /** @stable ICU 4.4 */ | |
348 | USCRIPT_OLD_SOUTH_ARABIAN = 133,/* Sarb */ | |
349 | ||
350 | /** @stable ICU 4.6 */ | |
351 | USCRIPT_BASSA_VAH = 134,/* Bass */ | |
352 | /** @stable ICU 54 */ | |
353 | USCRIPT_DUPLOYAN = 135,/* Dupl */ | |
354 | #ifndef U_HIDE_DEPRECATED_API | |
355 | /** @deprecated ICU 54 Typo, use USCRIPT_DUPLOYAN */ | |
356 | USCRIPT_DUPLOYAN_SHORTAND = USCRIPT_DUPLOYAN, | |
357 | #endif /* U_HIDE_DEPRECATED_API */ | |
358 | /** @stable ICU 4.6 */ | |
359 | USCRIPT_ELBASAN = 136,/* Elba */ | |
360 | /** @stable ICU 4.6 */ | |
361 | USCRIPT_GRANTHA = 137,/* Gran */ | |
362 | /** @stable ICU 4.6 */ | |
363 | USCRIPT_KPELLE = 138,/* Kpel */ | |
364 | /** @stable ICU 4.6 */ | |
365 | USCRIPT_LOMA = 139,/* Loma */ | |
366 | /** Mende Kikakui @stable ICU 4.6 */ | |
367 | USCRIPT_MENDE = 140,/* Mend */ | |
368 | /** @stable ICU 4.6 */ | |
369 | USCRIPT_MEROITIC_CURSIVE = 141,/* Merc */ | |
370 | /** @stable ICU 4.6 */ | |
371 | USCRIPT_OLD_NORTH_ARABIAN = 142,/* Narb */ | |
372 | /** @stable ICU 4.6 */ | |
373 | USCRIPT_NABATAEAN = 143,/* Nbat */ | |
374 | /** @stable ICU 4.6 */ | |
375 | USCRIPT_PALMYRENE = 144,/* Palm */ | |
376 | /** @stable ICU 54 */ | |
377 | USCRIPT_KHUDAWADI = 145,/* Sind */ | |
378 | /** @stable ICU 4.6 */ | |
379 | USCRIPT_SINDHI = USCRIPT_KHUDAWADI, | |
380 | /** @stable ICU 4.6 */ | |
381 | USCRIPT_WARANG_CITI = 146,/* Wara */ | |
382 | ||
383 | /** @stable ICU 4.8 */ | |
384 | USCRIPT_AFAKA = 147,/* Afak */ | |
385 | /** @stable ICU 4.8 */ | |
386 | USCRIPT_JURCHEN = 148,/* Jurc */ | |
387 | /** @stable ICU 4.8 */ | |
388 | USCRIPT_MRO = 149,/* Mroo */ | |
389 | /** @stable ICU 4.8 */ | |
390 | USCRIPT_NUSHU = 150,/* Nshu */ | |
391 | /** @stable ICU 4.8 */ | |
392 | USCRIPT_SHARADA = 151,/* Shrd */ | |
393 | /** @stable ICU 4.8 */ | |
394 | USCRIPT_SORA_SOMPENG = 152,/* Sora */ | |
395 | /** @stable ICU 4.8 */ | |
396 | USCRIPT_TAKRI = 153,/* Takr */ | |
397 | /** @stable ICU 4.8 */ | |
398 | USCRIPT_TANGUT = 154,/* Tang */ | |
399 | /** @stable ICU 4.8 */ | |
400 | USCRIPT_WOLEAI = 155,/* Wole */ | |
401 | ||
402 | /** @stable ICU 49 */ | |
403 | USCRIPT_ANATOLIAN_HIEROGLYPHS = 156,/* Hluw */ | |
404 | /** @stable ICU 49 */ | |
405 | USCRIPT_KHOJKI = 157,/* Khoj */ | |
406 | /** @stable ICU 49 */ | |
407 | USCRIPT_TIRHUTA = 158,/* Tirh */ | |
408 | ||
409 | /** @stable ICU 52 */ | |
410 | USCRIPT_CAUCASIAN_ALBANIAN = 159,/* Aghb */ | |
411 | /** @stable ICU 52 */ | |
412 | USCRIPT_MAHAJANI = 160,/* Mahj */ | |
413 | ||
414 | /** @stable ICU 54 */ | |
415 | USCRIPT_AHOM = 161,/* Ahom */ | |
416 | /** @stable ICU 54 */ | |
417 | USCRIPT_HATRAN = 162,/* Hatr */ | |
418 | /** @stable ICU 54 */ | |
419 | USCRIPT_MODI = 163,/* Modi */ | |
420 | /** @stable ICU 54 */ | |
421 | USCRIPT_MULTANI = 164,/* Mult */ | |
422 | /** @stable ICU 54 */ | |
423 | USCRIPT_PAU_CIN_HAU = 165,/* Pauc */ | |
424 | /** @stable ICU 54 */ | |
425 | USCRIPT_SIDDHAM = 166,/* Sidd */ | |
426 | ||
427 | /** | |
428 | * One higher than the last script code constant. | |
429 | * This value increases as constants for script codes are added. | |
430 | * | |
431 | * There are constants for Unicode 7 script property values. | |
432 | * There are constants for ISO 15924 script codes assigned on or before 2013-10-12. | |
433 | * There are no constants for private use codes from Qaaa - Qabx | |
434 | * except as used in the UCD. | |
435 | * | |
436 | * @stable ICU 2.2 | |
437 | */ | |
438 | USCRIPT_CODE_LIMIT = 167 | |
439 | } UScriptCode; | |
440 | ||
441 | /** | |
442 | * Gets the script codes associated with the given locale or ISO 15924 abbreviation or name. | |
443 | * Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym". | |
444 | * Fills in USCRIPT_LATIN given "en" OR "en_US" | |
445 | * If the required capacity is greater than the capacity of the destination buffer, | |
446 | * then the error code is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned. | |
447 | * | |
448 | * <p>Note: To search by short or long script alias only, use | |
449 | * u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. That does | |
450 | * a fast lookup with no access of the locale data. | |
451 | * | |
452 | * @param nameOrAbbrOrLocale name of the script, as given in | |
453 | * PropertyValueAliases.txt, or ISO 15924 code or locale | |
454 | * @param fillIn the UScriptCode buffer to fill in the script code | |
455 | * @param capacity the capacity (size) fo UScriptCode buffer passed in. | |
456 | * @param err the error status code. | |
457 | * @return The number of script codes filled in the buffer passed in | |
458 | * @stable ICU 2.4 | |
459 | */ | |
460 | U_STABLE int32_t U_EXPORT2 | |
461 | uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err); | |
462 | ||
463 | /** | |
464 | * Returns the long Unicode script name, if there is one. | |
465 | * Otherwise returns the 4-letter ISO 15924 script code. | |
466 | * Returns "Malayam" given USCRIPT_MALAYALAM. | |
467 | * | |
468 | * @param scriptCode UScriptCode enum | |
469 | * @return long script name as given in PropertyValueAliases.txt, or the 4-letter code, | |
470 | * or NULL if scriptCode is invalid | |
471 | * @stable ICU 2.4 | |
472 | */ | |
473 | U_STABLE const char* U_EXPORT2 | |
474 | uscript_getName(UScriptCode scriptCode); | |
475 | ||
476 | /** | |
477 | * Returns the 4-letter ISO 15924 script code, | |
478 | * which is the same as the short Unicode script name if Unicode has names for the script. | |
479 | * Returns "Mlym" given USCRIPT_MALAYALAM. | |
480 | * | |
481 | * @param scriptCode UScriptCode enum | |
482 | * @return short script name (4-letter code), or NULL if scriptCode is invalid | |
483 | * @stable ICU 2.4 | |
484 | */ | |
485 | U_STABLE const char* U_EXPORT2 | |
486 | uscript_getShortName(UScriptCode scriptCode); | |
487 | ||
488 | /** | |
489 | * Gets the script code associated with the given codepoint. | |
490 | * Returns USCRIPT_MALAYALAM given 0x0D02 | |
491 | * @param codepoint UChar32 codepoint | |
492 | * @param err the error status code. | |
493 | * @return The UScriptCode, or 0 if codepoint is invalid | |
494 | * @stable ICU 2.4 | |
495 | */ | |
496 | U_STABLE UScriptCode U_EXPORT2 | |
497 | uscript_getScript(UChar32 codepoint, UErrorCode *err); | |
498 | ||
499 | /** | |
500 | * Do the Script_Extensions of code point c contain script sc? | |
501 | * If c does not have explicit Script_Extensions, then this tests whether | |
502 | * c has the Script property value sc. | |
503 | * | |
504 | * Some characters are commonly used in multiple scripts. | |
505 | * For more information, see UAX #24: http://www.unicode.org/reports/tr24/. | |
506 | * | |
507 | * The Script_Extensions property is provisional. It may be modified or removed | |
508 | * in future versions of the Unicode Standard, and thus in ICU. | |
509 | * @param c code point | |
510 | * @param sc script code | |
511 | * @return TRUE if sc is in Script_Extensions(c) | |
512 | * @stable ICU 49 | |
513 | */ | |
514 | U_STABLE UBool U_EXPORT2 | |
515 | uscript_hasScript(UChar32 c, UScriptCode sc); | |
516 | ||
517 | /** | |
518 | * Writes code point c's Script_Extensions as a list of UScriptCode values | |
519 | * to the output scripts array and returns the number of script codes. | |
520 | * - If c does have Script_Extensions, then the Script property value | |
521 | * (normally Common or Inherited) is not included. | |
522 | * - If c does not have Script_Extensions, then the one Script code is written to the output array. | |
523 | * - If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written. | |
524 | * In other words, if the return value is 1, | |
525 | * then the output array contains exactly c's single Script code. | |
526 | * If the return value is n>=2, then the output array contains c's n Script_Extensions script codes. | |
527 | * | |
528 | * Some characters are commonly used in multiple scripts. | |
529 | * For more information, see UAX #24: http://www.unicode.org/reports/tr24/. | |
530 | * | |
531 | * If there are more than capacity script codes to be written, then | |
532 | * U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned. | |
533 | * (Usual ICU buffer handling behavior.) | |
534 | * | |
535 | * The Script_Extensions property is provisional. It may be modified or removed | |
536 | * in future versions of the Unicode Standard, and thus in ICU. | |
537 | * @param c code point | |
538 | * @param scripts output script code array | |
539 | * @param capacity capacity of the scripts array | |
540 | * @param errorCode Standard ICU error code. Its input value must | |
541 | * pass the U_SUCCESS() test, or else the function returns | |
542 | * immediately. Check for U_FAILURE() on output or use with | |
543 | * function chaining. (See User Guide for details.) | |
544 | * @return number of script codes in c's Script_Extensions, or 1 for the single Script value, | |
545 | * written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity | |
546 | * @stable ICU 49 | |
547 | */ | |
548 | U_STABLE int32_t U_EXPORT2 | |
549 | uscript_getScriptExtensions(UChar32 c, | |
550 | UScriptCode *scripts, int32_t capacity, | |
551 | UErrorCode *errorCode); | |
552 | ||
553 | /** | |
554 | * Script usage constants. | |
555 | * See UAX #31 Unicode Identifier and Pattern Syntax. | |
556 | * http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers | |
557 | * | |
558 | * @stable ICU 51 | |
559 | */ | |
560 | typedef enum UScriptUsage { | |
561 | /** Not encoded in Unicode. @stable ICU 51 */ | |
562 | USCRIPT_USAGE_NOT_ENCODED, | |
563 | /** Unknown script usage. @stable ICU 51 */ | |
564 | USCRIPT_USAGE_UNKNOWN, | |
565 | /** Candidate for Exclusion from Identifiers. @stable ICU 51 */ | |
566 | USCRIPT_USAGE_EXCLUDED, | |
567 | /** Limited Use script. @stable ICU 51 */ | |
568 | USCRIPT_USAGE_LIMITED_USE, | |
569 | /** Aspirational Use script. @stable ICU 51 */ | |
570 | USCRIPT_USAGE_ASPIRATIONAL, | |
571 | /** Recommended script. @stable ICU 51 */ | |
572 | USCRIPT_USAGE_RECOMMENDED | |
573 | } UScriptUsage; | |
574 | ||
575 | /** | |
576 | * Writes the script sample character string. | |
577 | * This string normally consists of one code point but might be longer. | |
578 | * The string is empty if the script is not encoded. | |
579 | * | |
580 | * @param script script code | |
581 | * @param dest output string array | |
582 | * @param capacity number of UChars in the dest array | |
583 | * @param pErrorCode standard ICU in/out error code, must pass U_SUCCESS() on input | |
584 | * @return the string length, even if U_BUFFER_OVERFLOW_ERROR | |
585 | * @stable ICU 51 | |
586 | */ | |
587 | U_STABLE int32_t U_EXPORT2 | |
588 | uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode); | |
589 | ||
590 | #if U_SHOW_CPLUSPLUS_API | |
591 | ||
592 | U_NAMESPACE_BEGIN | |
593 | class UnicodeString; | |
594 | U_NAMESPACE_END | |
595 | ||
596 | /** | |
597 | * Returns the script sample character string. | |
598 | * This string normally consists of one code point but might be longer. | |
599 | * The string is empty if the script is not encoded. | |
600 | * | |
601 | * @param script script code | |
602 | * @return the sample character string | |
603 | * @stable ICU 51 | |
604 | */ | |
605 | U_COMMON_API icu::UnicodeString U_EXPORT2 | |
606 | uscript_getSampleUnicodeString(UScriptCode script); | |
607 | ||
608 | #endif | |
609 | ||
610 | /** | |
611 | * Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax. | |
612 | * Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode. | |
613 | * | |
614 | * @param script script code | |
615 | * @return script usage | |
616 | * @see UScriptUsage | |
617 | * @stable ICU 51 | |
618 | */ | |
619 | U_STABLE UScriptUsage U_EXPORT2 | |
620 | uscript_getUsage(UScriptCode script); | |
621 | ||
622 | /** | |
623 | * Returns TRUE if the script is written right-to-left. | |
624 | * For example, Arab and Hebr. | |
625 | * | |
626 | * @param script script code | |
627 | * @return TRUE if the script is right-to-left | |
628 | * @stable ICU 51 | |
629 | */ | |
630 | U_STABLE UBool U_EXPORT2 | |
631 | uscript_isRightToLeft(UScriptCode script); | |
632 | ||
633 | /** | |
634 | * Returns TRUE if the script allows line breaks between letters (excluding hyphenation). | |
635 | * Such a script typically requires dictionary-based line breaking. | |
636 | * For example, Hani and Thai. | |
637 | * | |
638 | * @param script script code | |
639 | * @return TRUE if the script allows line breaks between letters | |
640 | * @stable ICU 51 | |
641 | */ | |
642 | U_STABLE UBool U_EXPORT2 | |
643 | uscript_breaksBetweenLetters(UScriptCode script); | |
644 | ||
645 | /** | |
646 | * Returns TRUE if in modern (or most recent) usage of the script case distinctions are customary. | |
647 | * For example, Latn and Cyrl. | |
648 | * | |
649 | * @param script script code | |
650 | * @return TRUE if the script is cased | |
651 | * @stable ICU 51 | |
652 | */ | |
653 | U_STABLE UBool U_EXPORT2 | |
654 | uscript_isCased(UScriptCode script); | |
655 | ||
656 | #endif |