X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/51004dcb01e06fef634b61be77ed73dd61cb6db9..2ca993e82fb37b597a3c73ecd1586a139a6579c5:/icuSources/common/unicode/uchar.h diff --git a/icuSources/common/unicode/uchar.h b/icuSources/common/unicode/uchar.h index e37b6a8e..61835840 100644 --- a/icuSources/common/unicode/uchar.h +++ b/icuSources/common/unicode/uchar.h @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 1997-2013, International Business Machines +* Copyright (C) 1997-2016, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * @@ -39,7 +39,7 @@ U_CDECL_BEGIN * @see u_getUnicodeVersion * @stable ICU 2.0 */ -#define U_UNICODE_VERSION "6.2" +#define U_UNICODE_VERSION "8.0" /** * \file @@ -397,8 +397,38 @@ typedef enum UProperty { UCHAR_CHANGES_WHEN_CASEMAPPED=55, /** Binary property Changes_When_NFKC_Casefolded. @stable ICU 4.4 */ UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=56, +#ifndef U_HIDE_DRAFT_API + /** + * Binary property Emoji. + * See http://www.unicode.org/reports/tr51/#Emoji_Properties + * + * @draft ICU 57 + */ + UCHAR_EMOJI=57, + /** + * Binary property Emoji_Presentation. + * See http://www.unicode.org/reports/tr51/#Emoji_Properties + * + * @draft ICU 57 + */ + UCHAR_EMOJI_PRESENTATION=58, + /** + * Binary property Emoji_Modifier. + * See http://www.unicode.org/reports/tr51/#Emoji_Properties + * + * @draft ICU 57 + */ + UCHAR_EMOJI_MODIFIER=59, + /** + * Binary property Emoji_Modifier_Base. + * See http://www.unicode.org/reports/tr51/#Emoji_Properties + * + * @draft ICU 57 + */ + UCHAR_EMOJI_MODIFIER_BASE=60, +#endif /* U_HIDE_DRAFT_API */ /** One more than the last constant for binary Unicode properties. @stable ICU 2.1 */ - UCHAR_BINARY_LIMIT=57, + UCHAR_BINARY_LIMIT=61, /** Enumerated property Bidi_Class. Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */ @@ -480,8 +510,13 @@ typedef enum UProperty { (http://www.unicode.org/reports/tr29/) Returns UWordBreakValues values. @stable ICU 3.4 */ UCHAR_WORD_BREAK=0x1014, + /** Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3). + Used in UAX #9: Unicode Bidirectional Algorithm + (http://www.unicode.org/reports/tr9/) + Returns UBidiPairedBracketType values. @stable ICU 52 */ + UCHAR_BIDI_PAIRED_BRACKET_TYPE=0x1015, /** One more than the last constant for enumerated/integer Unicode properties. @stable ICU 2.2 */ - UCHAR_INT_LIMIT=0x1015, + UCHAR_INT_LIMIT=0x1016, /** Bitmask property General_Category_Mask. This is the General_Category property returned as a bit mask. @@ -516,9 +551,11 @@ typedef enum UProperty { /** String property Case_Folding. Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */ UCHAR_CASE_FOLDING=0x4002, +#ifndef U_HIDE_DEPRECATED_API /** Deprecated string property ISO_Comment. Corresponds to u_getISOComment. @deprecated ICU 49 */ UCHAR_ISO_COMMENT=0x4003, +#endif /* U_HIDE_DEPRECATED_API */ /** String property Lowercase_Mapping. Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */ UCHAR_LOWERCASE_MAPPING=0x4004, @@ -540,19 +577,23 @@ typedef enum UProperty { /** String property Titlecase_Mapping. Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */ UCHAR_TITLECASE_MAPPING=0x400A, +#ifndef U_HIDE_DEPRECATED_API /** String property Unicode_1_Name. This property is of little practical value. Beginning with ICU 49, ICU APIs return an empty string for this property. Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). @deprecated ICU 49 */ UCHAR_UNICODE_1_NAME=0x400B, +#endif /* U_HIDE_DEPRECATED_API */ /** String property Uppercase_Mapping. Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */ UCHAR_UPPERCASE_MAPPING=0x400C, + /** String property Bidi_Paired_Bracket (new in Unicode 6.3). + Corresponds to u_getBidiPairedBracket. @stable ICU 52 */ + UCHAR_BIDI_PAIRED_BRACKET=0x400D, /** One more than the last constant for string Unicode properties. @stable ICU 2.4 */ - UCHAR_STRING_LIMIT=0x400D, - /** Provisional property Script_Extensions (new in Unicode 6.0). - As a provisional property, it may be modified or removed - in future versions of the Unicode Standard, and thus in ICU. + UCHAR_STRING_LIMIT=0x400E, + + /** Miscellaneous property Script_Extensions (new in Unicode 6.0). Some characters are commonly used in multiple scripts. For more information, see UAX #24: http://www.unicode.org/reports/tr24/. Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h. @@ -809,10 +850,41 @@ typedef enum UCharDirection { U_DIR_NON_SPACING_MARK = 17, /** BN @stable ICU 2.0 */ U_BOUNDARY_NEUTRAL = 18, + /** FSI @stable ICU 52 */ + U_FIRST_STRONG_ISOLATE = 19, + /** LRI @stable ICU 52 */ + U_LEFT_TO_RIGHT_ISOLATE = 20, + /** RLI @stable ICU 52 */ + U_RIGHT_TO_LEFT_ISOLATE = 21, + /** PDI @stable ICU 52 */ + U_POP_DIRECTIONAL_ISOLATE = 22, /** @stable ICU 2.0 */ U_CHAR_DIRECTION_COUNT } UCharDirection; +/** + * Bidi Paired Bracket Type constants. + * + * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE + * @stable ICU 52 + */ +typedef enum UBidiPairedBracketType { + /* + * Note: UBidiPairedBracketType constants are parsed by preparseucd.py. + * It matches lines like + * U_BPT_ + */ + + /** Not a paired bracket. @stable ICU 52 */ + U_BPT_NONE, + /** Open paired bracket. @stable ICU 52 */ + U_BPT_OPEN, + /** Close paired bracket. @stable ICU 52 */ + U_BPT_CLOSE, + /** @stable ICU 52 */ + U_BPT_COUNT /* 3 */ +} UBidiPairedBracketType; + /** * Constants for Unicode blocks, see the Unicode Data file Blocks.txt * @stable ICU 2.0 @@ -1141,7 +1213,7 @@ enum UBlockCode { * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". * @stable ICU 2.2 */ - UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT, + UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT, /** @stable ICU 2.2 */ UBLOCK_TAGALOG = 98, /*[1700]*/ /** @stable ICU 2.2 */ @@ -1410,8 +1482,98 @@ enum UBlockCode { /** @stable ICU 49 */ UBLOCK_TAKRI = 220, /*[11680]*/ - /** @stable ICU 2.0 */ - UBLOCK_COUNT = 221, + /* New blocks in Unicode 7.0 */ + + /** @stable ICU 54 */ + UBLOCK_BASSA_VAH = 221, /*[16AD0]*/ + /** @stable ICU 54 */ + UBLOCK_CAUCASIAN_ALBANIAN = 222, /*[10530]*/ + /** @stable ICU 54 */ + UBLOCK_COPTIC_EPACT_NUMBERS = 223, /*[102E0]*/ + /** @stable ICU 54 */ + UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED = 224, /*[1AB0]*/ + /** @stable ICU 54 */ + UBLOCK_DUPLOYAN = 225, /*[1BC00]*/ + /** @stable ICU 54 */ + UBLOCK_ELBASAN = 226, /*[10500]*/ + /** @stable ICU 54 */ + UBLOCK_GEOMETRIC_SHAPES_EXTENDED = 227, /*[1F780]*/ + /** @stable ICU 54 */ + UBLOCK_GRANTHA = 228, /*[11300]*/ + /** @stable ICU 54 */ + UBLOCK_KHOJKI = 229, /*[11200]*/ + /** @stable ICU 54 */ + UBLOCK_KHUDAWADI = 230, /*[112B0]*/ + /** @stable ICU 54 */ + UBLOCK_LATIN_EXTENDED_E = 231, /*[AB30]*/ + /** @stable ICU 54 */ + UBLOCK_LINEAR_A = 232, /*[10600]*/ + /** @stable ICU 54 */ + UBLOCK_MAHAJANI = 233, /*[11150]*/ + /** @stable ICU 54 */ + UBLOCK_MANICHAEAN = 234, /*[10AC0]*/ + /** @stable ICU 54 */ + UBLOCK_MENDE_KIKAKUI = 235, /*[1E800]*/ + /** @stable ICU 54 */ + UBLOCK_MODI = 236, /*[11600]*/ + /** @stable ICU 54 */ + UBLOCK_MRO = 237, /*[16A40]*/ + /** @stable ICU 54 */ + UBLOCK_MYANMAR_EXTENDED_B = 238, /*[A9E0]*/ + /** @stable ICU 54 */ + UBLOCK_NABATAEAN = 239, /*[10880]*/ + /** @stable ICU 54 */ + UBLOCK_OLD_NORTH_ARABIAN = 240, /*[10A80]*/ + /** @stable ICU 54 */ + UBLOCK_OLD_PERMIC = 241, /*[10350]*/ + /** @stable ICU 54 */ + UBLOCK_ORNAMENTAL_DINGBATS = 242, /*[1F650]*/ + /** @stable ICU 54 */ + UBLOCK_PAHAWH_HMONG = 243, /*[16B00]*/ + /** @stable ICU 54 */ + UBLOCK_PALMYRENE = 244, /*[10860]*/ + /** @stable ICU 54 */ + UBLOCK_PAU_CIN_HAU = 245, /*[11AC0]*/ + /** @stable ICU 54 */ + UBLOCK_PSALTER_PAHLAVI = 246, /*[10B80]*/ + /** @stable ICU 54 */ + UBLOCK_SHORTHAND_FORMAT_CONTROLS = 247, /*[1BCA0]*/ + /** @stable ICU 54 */ + UBLOCK_SIDDHAM = 248, /*[11580]*/ + /** @stable ICU 54 */ + UBLOCK_SINHALA_ARCHAIC_NUMBERS = 249, /*[111E0]*/ + /** @stable ICU 54 */ + UBLOCK_SUPPLEMENTAL_ARROWS_C = 250, /*[1F800]*/ + /** @stable ICU 54 */ + UBLOCK_TIRHUTA = 251, /*[11480]*/ + /** @stable ICU 54 */ + UBLOCK_WARANG_CITI = 252, /*[118A0]*/ + + /* New blocks in Unicode 8.0 */ + + /** @stable ICU 56 */ + UBLOCK_AHOM = 253, /*[11700]*/ + /** @stable ICU 56 */ + UBLOCK_ANATOLIAN_HIEROGLYPHS = 254, /*[14400]*/ + /** @stable ICU 56 */ + UBLOCK_CHEROKEE_SUPPLEMENT = 255, /*[AB70]*/ + /** @stable ICU 56 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 256, /*[2B820]*/ + /** @stable ICU 56 */ + UBLOCK_EARLY_DYNASTIC_CUNEIFORM = 257, /*[12480]*/ + /** @stable ICU 56 */ + UBLOCK_HATRAN = 258, /*[108E0]*/ + /** @stable ICU 56 */ + UBLOCK_MULTANI = 259, /*[11280]*/ + /** @stable ICU 56 */ + UBLOCK_OLD_HUNGARIAN = 260, /*[10C80]*/ + /** @stable ICU 56 */ + UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 261, /*[1F900]*/ + /** @stable ICU 56 */ + UBLOCK_SUTTON_SIGNWRITING = 262, /*[1D800]*/ + + /** @stable ICU 2.0 */ + UBLOCK_COUNT = 263, /** @stable ICU 2.0 */ UBLOCK_INVALID_CODE=-1 @@ -1457,7 +1619,7 @@ typedef enum UEastAsianWidth { typedef enum UCharNameChoice { /** Unicode character name (Name property). @stable ICU 2.0 */ U_UNICODE_CHAR_NAME, -#ifndef U_HIDE_DEPRECATED_API +#ifndef U_HIDE_DEPRECATED_API /** * The Unicode_1_Name property value which is of little practical value. * Beginning with ICU 49, ICU APIs return an empty string for this name choice. @@ -1620,6 +1782,34 @@ typedef enum UJoiningGroup { U_JG_FARSI_YEH, /**< @stable ICU 4.4 */ U_JG_NYA, /**< @stable ICU 4.4 */ U_JG_ROHINGYA_YEH, /**< @stable ICU 49 */ + U_JG_MANICHAEAN_ALEPH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_AYIN, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_BETH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_DALETH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_DHAMEDH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_FIVE, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_GIMEL, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_HETH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_HUNDRED, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_KAPH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_LAMEDH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_MEM, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_NUN, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_ONE, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_PE, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_QOPH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_RESH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_SADHE, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_SAMEKH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_TAW, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_TEN, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_TETH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_THAMEDH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_TWENTY, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_WAW, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_YODH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_ZAYIN, /**< @stable ICU 54 */ + U_JG_STRAIGHT_WAW, /**< @stable ICU 54 */ U_JG_COUNT } UJoiningGroup; @@ -1680,7 +1870,10 @@ typedef enum UWordBreakValues { U_WB_MIDNUMLET =11, /*[MB]*/ U_WB_NEWLINE =12, /*[NL]*/ U_WB_REGIONAL_INDICATOR = 13, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ - U_WB_COUNT = 14 + U_WB_HEBREW_LETTER = 14, /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */ + U_WB_SINGLE_QUOTE = 15, /*[SQ]*/ + U_WB_DOUBLE_QUOTE = 16, /*[DQ]*/ + U_WB_COUNT = 17 } UWordBreakValues; /** @@ -2483,7 +2676,7 @@ u_isMirrored(UChar32 c); * as the mirror-image of the default glyph of the specified * character. This is useful for text conversion to and from * codepages with visual order, and for displays without glyph - * selecetion capabilities. + * selection capabilities. * * @param c the code point to be mapped * @return another Unicode code point that may serve as a mirror-image @@ -2497,6 +2690,25 @@ u_isMirrored(UChar32 c); U_STABLE UChar32 U_EXPORT2 u_charMirror(UChar32 c); +/** + * Maps the specified character to its paired bracket character. + * For Bidi_Paired_Bracket_Type!=None, this is the same as u_charMirror(). + * Otherwise c itself is returned. + * See http://www.unicode.org/reports/tr9/ + * + * @param c the code point to be mapped + * @return the paired bracket code point, + * or c itself if there is no such mapping + * (Bidi_Paired_Bracket_Type=None) + * + * @see UCHAR_BIDI_PAIRED_BRACKET + * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE + * @see u_charMirror + * @stable ICU 52 + */ +U_STABLE UChar32 U_EXPORT2 +u_getBidiPairedBracket(UChar32 c); + /** * Returns the general category value for the code point. * @@ -2657,7 +2869,7 @@ u_charName(UChar32 code, UCharNameChoice nameChoice, char *buffer, int32_t bufferLength, UErrorCode *pErrorCode); -#ifndef U_HIDE_DEPRECATED_API +#ifndef U_HIDE_DEPRECATED_API /** * Returns an empty string. * Used to return the ISO 10646 comment for a character. @@ -2676,7 +2888,7 @@ u_charName(UChar32 code, UCharNameChoice nameChoice, * * @deprecated ICU 49 */ -U_STABLE int32_t U_EXPORT2 +U_DEPRECATED int32_t U_EXPORT2 u_getISOComment(UChar32 c, char *dest, int32_t destCapacity, UErrorCode *pErrorCode);