git.saurik.com Git - apple/javascriptcore.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	**********************************************************************
	3	* Copyright (C) 1997-2010, International Business Machines
	4	* Corporation and others. All Rights Reserved.
	5	**********************************************************************
	6	*
	7	* File USCRIPT.H
	8	*
	9	* Modification History:
	10	*
	11	* Date Name Description
	12	* 07/06/2001 Ram Creation.
	13	******************************************************************************
	14	*/
	15
	16	#ifndef USCRIPT_H
	17	#define USCRIPT_H
	18	#include "unicode/utypes.h"
	19
	20	/**
	21	* \file
	22	* \brief C API: Unicode Script Information
	23	*/
	24
	25	/**
	26	* Constants for ISO 15924 script codes.
	27	*
	28	* Many of these script codes - those from Unicode's ScriptNames.txt -
	29	* are character property values for Unicode's Script property.
	30	* See UAX #24 Script Names (http://www.unicode.org/reports/tr24/).
	31	*
	32	* Starting with ICU 3.6, constants for most ISO 15924 script codes
	33	* are included (currently excluding private-use codes Qaaa..Qabx).
	34	* For scripts for which there are codes in ISO 15924 but which are not
	35	* used in the Unicode Character Database (UCD), there are no Unicode characters
	36	* associated with those scripts.
	37	*
	38	* For example, there are no characters that have a UCD script code of
	39	* Hans or Hant. All Han ideographs have the Hani script code.
	40	* The Hans and Hant script codes are used with CLDR data.
	41	*
	42	* ISO 15924 script codes are included for use with CLDR and similar.
	43	*
	44	* @stable ICU 2.2
	45	*/
	46	typedef enum UScriptCode {
	47	USCRIPT_INVALID_CODE = -1,
	48	USCRIPT_COMMON = 0, /* Zyyy */
	49	USCRIPT_INHERITED = 1, /* Zinh / / "Code for inherited script", for non-spacing combining marks; also Qaai */
	50	USCRIPT_ARABIC = 2, /* Arab */
	51	USCRIPT_ARMENIAN = 3, /* Armn */
	52	USCRIPT_BENGALI = 4, /* Beng */
	53	USCRIPT_BOPOMOFO = 5, /* Bopo */
	54	USCRIPT_CHEROKEE = 6, /* Cher */
	55	USCRIPT_COPTIC = 7, /* Copt */
	56	USCRIPT_CYRILLIC = 8, /* Cyrl */
	57	USCRIPT_DESERET = 9, /* Dsrt */
	58	USCRIPT_DEVANAGARI = 10, /* Deva */
	59	USCRIPT_ETHIOPIC = 11, /* Ethi */
	60	USCRIPT_GEORGIAN = 12, /* Geor */
	61	USCRIPT_GOTHIC = 13, /* Goth */
	62	USCRIPT_GREEK = 14, /* Grek */
	63	USCRIPT_GUJARATI = 15, /* Gujr */
	64	USCRIPT_GURMUKHI = 16, /* Guru */
	65	USCRIPT_HAN = 17, /* Hani */
	66	USCRIPT_HANGUL = 18, /* Hang */
	67	USCRIPT_HEBREW = 19, /* Hebr */
	68	USCRIPT_HIRAGANA = 20, /* Hira */
	69	USCRIPT_KANNADA = 21, /* Knda */
	70	USCRIPT_KATAKANA = 22, /* Kana */
	71	USCRIPT_KHMER = 23, /* Khmr */
	72	USCRIPT_LAO = 24, /* Laoo */
	73	USCRIPT_LATIN = 25, /* Latn */
	74	USCRIPT_MALAYALAM = 26, /* Mlym */
	75	USCRIPT_MONGOLIAN = 27, /* Mong */
	76	USCRIPT_MYANMAR = 28, /* Mymr */
	77	USCRIPT_OGHAM = 29, /* Ogam */
	78	USCRIPT_OLD_ITALIC = 30, /* Ital */
	79	USCRIPT_ORIYA = 31, /* Orya */
	80	USCRIPT_RUNIC = 32, /* Runr */
	81	USCRIPT_SINHALA = 33, /* Sinh */
	82	USCRIPT_SYRIAC = 34, /* Syrc */
	83	USCRIPT_TAMIL = 35, /* Taml */
	84	USCRIPT_TELUGU = 36, /* Telu */
	85	USCRIPT_THAANA = 37, /* Thaa */
	86	USCRIPT_THAI = 38, /* Thai */
	87	USCRIPT_TIBETAN = 39, /* Tibt */
	88	/** Canadian_Aboriginal script. @stable ICU 2.6 */
	89	USCRIPT_CANADIAN_ABORIGINAL = 40, /* Cans */
	90	/** Canadian_Aboriginal script (alias). @stable ICU 2.2 */
	91	USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL,
	92	USCRIPT_YI = 41, /* Yiii */
	93	USCRIPT_TAGALOG = 42, /* Tglg */
	94	USCRIPT_HANUNOO = 43, /* Hano */
	95	USCRIPT_BUHID = 44, /* Buhd */
	96	USCRIPT_TAGBANWA = 45, /* Tagb */
	97
	98	/* New scripts in Unicode 4 @stable ICU 2.6 */
	99	USCRIPT_BRAILLE = 46, /* Brai */
	100	USCRIPT_CYPRIOT = 47, /* Cprt */
	101	USCRIPT_LIMBU = 48, /* Limb */
	102	USCRIPT_LINEAR_B = 49, /* Linb */
	103	USCRIPT_OSMANYA = 50, /* Osma */
	104	USCRIPT_SHAVIAN = 51, /* Shaw */
	105	USCRIPT_TAI_LE = 52, /* Tale */
	106	USCRIPT_UGARITIC = 53, /* Ugar */
	107
	108	/** New script code in Unicode 4.0.1 @stable ICU 3.0 */
	109	USCRIPT_KATAKANA_OR_HIRAGANA = 54,/Hrkt /
	110
	111	/* New scripts in Unicode 4.1 @stable ICU 3.4 */
	112	USCRIPT_BUGINESE = 55, /* Bugi */
	113	USCRIPT_GLAGOLITIC = 56, /* Glag */
	114	USCRIPT_KHAROSHTHI = 57, /* Khar */
	115	USCRIPT_SYLOTI_NAGRI = 58, /* Sylo */
	116	USCRIPT_NEW_TAI_LUE = 59, /* Talu */
	117	USCRIPT_TIFINAGH = 60, /* Tfng */
	118	USCRIPT_OLD_PERSIAN = 61, /* Xpeo */
	119
	120	/* New script codes from ISO 15924 @stable ICU 3.6 */
	121	USCRIPT_BALINESE = 62, /* Bali */
	122	USCRIPT_BATAK = 63, /* Batk */
	123	USCRIPT_BLISSYMBOLS = 64, /* Blis */
	124	USCRIPT_BRAHMI = 65, /* Brah */
	125	USCRIPT_CHAM = 66, /* Cham */
	126	USCRIPT_CIRTH = 67, /* Cirt */
	127	USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68, /* Cyrs */
	128	USCRIPT_DEMOTIC_EGYPTIAN = 69, /* Egyd */
	129	USCRIPT_HIERATIC_EGYPTIAN = 70, /* Egyh */
	130	USCRIPT_EGYPTIAN_HIEROGLYPHS = 71, /* Egyp */
	131	USCRIPT_KHUTSURI = 72, /* Geok */
	132	USCRIPT_SIMPLIFIED_HAN = 73, /* Hans */
	133	USCRIPT_TRADITIONAL_HAN = 74, /* Hant */
	134	USCRIPT_PAHAWH_HMONG = 75, /* Hmng */
	135	USCRIPT_OLD_HUNGARIAN = 76, /* Hung */
	136	USCRIPT_HARAPPAN_INDUS = 77, /* Inds */
	137	USCRIPT_JAVANESE = 78, /* Java */
	138	USCRIPT_KAYAH_LI = 79, /* Kali */
	139	USCRIPT_LATIN_FRAKTUR = 80, /* Latf */
	140	USCRIPT_LATIN_GAELIC = 81, /* Latg */
	141	USCRIPT_LEPCHA = 82, /* Lepc */
	142	USCRIPT_LINEAR_A = 83, /* Lina */
	143	/** @stable ICU 4.6 */
	144	USCRIPT_MANDAIC = 84, /* Mand */
	145	/** @stable ICU 3.6 */
	146	USCRIPT_MANDAEAN = USCRIPT_MANDAIC,
	147	USCRIPT_MAYAN_HIEROGLYPHS = 85, /* Maya */
	148	/** @stable ICU 4.6 */
	149	USCRIPT_MEROITIC_HIEROGLYPHS = 86, /* Mero */
	150	/** @stable ICU 3.6 */
	151	USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS,
	152	USCRIPT_NKO = 87, /* Nkoo */
	153	USCRIPT_ORKHON = 88, /* Orkh */
	154	USCRIPT_OLD_PERMIC = 89, /* Perm */
	155	USCRIPT_PHAGS_PA = 90, /* Phag */
	156	USCRIPT_PHOENICIAN = 91, /* Phnx */
	157	USCRIPT_PHONETIC_POLLARD = 92, /* Plrd */
	158	USCRIPT_RONGORONGO = 93, /* Roro */
	159	USCRIPT_SARATI = 94, /* Sara */
	160	USCRIPT_ESTRANGELO_SYRIAC = 95, /* Syre */
	161	USCRIPT_WESTERN_SYRIAC = 96, /* Syrj */
	162	USCRIPT_EASTERN_SYRIAC = 97, /* Syrn */
	163	USCRIPT_TENGWAR = 98, /* Teng */
	164	USCRIPT_VAI = 99, /* Vaii */
	165	USCRIPT_VISIBLE_SPEECH = 100,/* Visp */
	166	USCRIPT_CUNEIFORM = 101,/* Xsux */
	167	USCRIPT_UNWRITTEN_LANGUAGES = 102,/* Zxxx */
	168	USCRIPT_UNKNOWN = 103,/* Zzzz / / Unknown="Code for uncoded script", for unassigned code points */
	169
	170	/* New script codes from ISO 15924 @stable ICU 3.8 */
	171	USCRIPT_CARIAN = 104,/* Cari */
	172	USCRIPT_JAPANESE = 105,/* Jpan */
	173	USCRIPT_LANNA = 106,/* Lana */
	174	USCRIPT_LYCIAN = 107,/* Lyci */
	175	USCRIPT_LYDIAN = 108,/* Lydi */
	176	USCRIPT_OL_CHIKI = 109,/* Olck */
	177	USCRIPT_REJANG = 110,/* Rjng */
	178	USCRIPT_SAURASHTRA = 111,/* Saur */
	179	USCRIPT_SIGN_WRITING = 112,/* Sgnw */
	180	USCRIPT_SUNDANESE = 113,/* Sund */
	181	USCRIPT_MOON = 114,/* Moon */
	182	USCRIPT_MEITEI_MAYEK = 115,/* Mtei */
	183
	184	/* New script codes from ISO 15924 @stable ICU 4.0 */
	185	USCRIPT_IMPERIAL_ARAMAIC = 116,/* Armi */
	186	USCRIPT_AVESTAN = 117,/* Avst */
	187	USCRIPT_CHAKMA = 118,/* Cakm */
	188	USCRIPT_KOREAN = 119,/* Kore */
	189	USCRIPT_KAITHI = 120,/* Kthi */
	190	USCRIPT_MANICHAEAN = 121,/* Mani */
	191	USCRIPT_INSCRIPTIONAL_PAHLAVI = 122,/* Phli */
	192	USCRIPT_PSALTER_PAHLAVI = 123,/* Phlp */
	193	USCRIPT_BOOK_PAHLAVI = 124,/* Phlv */
	194	USCRIPT_INSCRIPTIONAL_PARTHIAN = 125,/* Prti */
	195	USCRIPT_SAMARITAN = 126,/* Samr */
	196	USCRIPT_TAI_VIET = 127,/* Tavt */
	197	USCRIPT_MATHEMATICAL_NOTATION = 128,/* Zmth */
	198	USCRIPT_SYMBOLS = 129,/* Zsym */
	199
	200	/* New script codes from ISO 15924 @stable ICU 4.4 */
	201	USCRIPT_BAMUM = 130,/* Bamu */
	202	USCRIPT_LISU = 131,/* Lisu */
	203	USCRIPT_NAKHI_GEBA = 132,/* Nkgb */
	204	USCRIPT_OLD_SOUTH_ARABIAN = 133,/* Sarb */
	205
	206	/* New script codes from ISO 15924 @stable ICU 4.6 */
	207	USCRIPT_BASSA_VAH = 134,/* Bass */
	208	USCRIPT_DUPLOYAN_SHORTAND = 135,/* Dupl */
	209	USCRIPT_ELBASAN = 136,/* Elba */
	210	USCRIPT_GRANTHA = 137,/* Gran */
	211	USCRIPT_KPELLE = 138,/* Kpel */
	212	USCRIPT_LOMA = 139,/* Loma */
	213	USCRIPT_MENDE = 140,/* Mend */
	214	USCRIPT_MEROITIC_CURSIVE = 141,/* Merc */
	215	USCRIPT_OLD_NORTH_ARABIAN = 142,/* Narb */
	216	USCRIPT_NABATAEAN = 143,/* Nbat */
	217	USCRIPT_PALMYRENE = 144,/* Palm */
	218	USCRIPT_SINDHI = 145,/* Sind */
	219	USCRIPT_WARANG_CITI = 146,/* Wara */
	220
	221	/* Private use codes from Qaaa - Qabx are not supported */
	222	USCRIPT_CODE_LIMIT = 147
	223	} UScriptCode;
	224
	225	/**
	226	* Gets script codes associated with the given locale or ISO 15924 abbreviation or name.
	227	* Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym".
	228	* Fills in USCRIPT_LATIN given "en" OR "en_US"
	229	* If required capacity is greater than capacity of the destination buffer then the error code
	230	* is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned
	231	*
	232	* <p>Note: To search by short or long script alias only, use
	233	* u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. This does
	234	* a fast lookup with no access of the locale data.
	235	* @param nameOrAbbrOrLocale name of the script, as given in
	236	* PropertyValueAliases.txt, or ISO 15924 code or locale
	237	* @param fillIn the UScriptCode buffer to fill in the script code
	238	* @param capacity the capacity (size) fo UScriptCode buffer passed in.
	239	* @param err the error status code.
	240	* @return The number of script codes filled in the buffer passed in
	241	* @stable ICU 2.4
	242	*/
	243	U_STABLE int32_t U_EXPORT2
	244	uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err);
	245
	246	/**
	247	* Gets a script name associated with the given script code.
	248	* Returns "Malayam" given USCRIPT_MALAYALAM
	249	* @param scriptCode UScriptCode enum
	250	* @return script long name as given in
	251	* PropertyValueAliases.txt, or NULL if scriptCode is invalid
	252	* @stable ICU 2.4
	253	*/
	254	U_STABLE const char* U_EXPORT2
	255	uscript_getName(UScriptCode scriptCode);
	256
	257	/**
	258	* Gets a script name associated with the given script code.
	259	* Returns "Mlym" given USCRIPT_MALAYALAM
	260	* @param scriptCode UScriptCode enum
	261	* @return script abbreviated name as given in
	262	* PropertyValueAliases.txt, or NULL if scriptCode is invalid
	263	* @stable ICU 2.4
	264	*/
	265	U_STABLE const char* U_EXPORT2
	266	uscript_getShortName(UScriptCode scriptCode);
	267
	268	/**
	269	* Gets the script code associated with the given codepoint.
	270	* Returns USCRIPT_MALAYALAM given 0x0D02
	271	* @param codepoint UChar32 codepoint
	272	* @param err the error status code.
	273	* @return The UScriptCode, or 0 if codepoint is invalid
	274	* @stable ICU 2.4
	275	*/
	276	U_STABLE UScriptCode U_EXPORT2
	277	uscript_getScript(UChar32 codepoint, UErrorCode *err);
	278
	279	/**
	280	* Is code point c used in script sc?
	281	* That is, does code point c have the Script property value sc,
	282	* or do code point c's Script_Extensions include script code sc?
	283	*
	284	* Some characters are commonly used in multiple scripts.
	285	* For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
	286	*
	287	* The Script_Extensions property is provisional. It may be modified or removed
	288	* in future versions of the Unicode Standard, and thus in ICU.
	289	* @param c code point
	290	* @param sc script code
	291	* @return TRUE if Script(c)==sc or sc is in Script_Extensions(c)
	292	* @draft ICU 4.6
	293	*/
	294	U_DRAFT UBool U_EXPORT2
	295	uscript_hasScript(UChar32 c, UScriptCode sc);
	296
	297	/**
	298	* Writes code point c's Script_Extensions as a list of UScriptCode values
	299	* to the output scripts array.
	300	*
	301	* Some characters are commonly used in multiple scripts.
	302	* For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
	303	*
	304	* If there are more than capacity script codes to be written, then
	305	* U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned.
	306	* (Usual ICU buffer handling behavior.)
	307	*
	308	* The Script_Extensions property is provisional. It may be modified or removed
	309	* in future versions of the Unicode Standard, and thus in ICU.
	310	* @param c code point
	311	* @param scripts output script code array
	312	* @param capacity capacity of the scripts array
	313	* @param errorCode Standard ICU error code. Its input value must
	314	* pass the U_SUCCESS() test, or else the function returns
	315	* immediately. Check for U_FAILURE() on output or use with
	316	* function chaining. (See User Guide for details.)
	317	* @return number of script codes in c's Script_Extensions,
	318	* written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity
	319	* @draft ICU 4.6
	320	*/
	321	U_DRAFT int32_t U_EXPORT2
	322	uscript_getScriptExtensions(UChar32 c,
	323	UScriptCode *scripts, int32_t capacity,
	324	UErrorCode *pErrorCode);
	325
	326	#endif