/*
**********************************************************************
-* Copyright (C) 1997-2013, International Business Machines
+* Copyright (C) 1997-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
* @see u_getUnicodeVersion
* @stable ICU 2.0
*/
-#define U_UNICODE_VERSION "6.2"
+#define U_UNICODE_VERSION "8.0"
/**
* \file
UCHAR_CHANGES_WHEN_CASEMAPPED=55,
/** Binary property Changes_When_NFKC_Casefolded. @stable ICU 4.4 */
UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=56,
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Binary property Emoji.
+ * See http://www.unicode.org/reports/tr51/#Emoji_Properties
+ *
+ * @draft ICU 57
+ */
+ UCHAR_EMOJI=57,
+ /**
+ * Binary property Emoji_Presentation.
+ * See http://www.unicode.org/reports/tr51/#Emoji_Properties
+ *
+ * @draft ICU 57
+ */
+ UCHAR_EMOJI_PRESENTATION=58,
+ /**
+ * Binary property Emoji_Modifier.
+ * See http://www.unicode.org/reports/tr51/#Emoji_Properties
+ *
+ * @draft ICU 57
+ */
+ UCHAR_EMOJI_MODIFIER=59,
+ /**
+ * Binary property Emoji_Modifier_Base.
+ * See http://www.unicode.org/reports/tr51/#Emoji_Properties
+ *
+ * @draft ICU 57
+ */
+ UCHAR_EMOJI_MODIFIER_BASE=60,
+#endif /* U_HIDE_DRAFT_API */
/** One more than the last constant for binary Unicode properties. @stable ICU 2.1 */
- UCHAR_BINARY_LIMIT=57,
+ UCHAR_BINARY_LIMIT=61,
/** Enumerated property Bidi_Class.
Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */
(http://www.unicode.org/reports/tr29/)
Returns UWordBreakValues values. @stable ICU 3.4 */
UCHAR_WORD_BREAK=0x1014,
+ /** Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
+ Used in UAX #9: Unicode Bidirectional Algorithm
+ (http://www.unicode.org/reports/tr9/)
+ Returns UBidiPairedBracketType values. @stable ICU 52 */
+ UCHAR_BIDI_PAIRED_BRACKET_TYPE=0x1015,
/** One more than the last constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
- UCHAR_INT_LIMIT=0x1015,
+ UCHAR_INT_LIMIT=0x1016,
/** Bitmask property General_Category_Mask.
This is the General_Category property returned as a bit mask.
/** String property Case_Folding.
Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */
UCHAR_CASE_FOLDING=0x4002,
+#ifndef U_HIDE_DEPRECATED_API
/** Deprecated string property ISO_Comment.
Corresponds to u_getISOComment. @deprecated ICU 49 */
UCHAR_ISO_COMMENT=0x4003,
+#endif /* U_HIDE_DEPRECATED_API */
/** String property Lowercase_Mapping.
Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */
UCHAR_LOWERCASE_MAPPING=0x4004,
/** String property Titlecase_Mapping.
Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */
UCHAR_TITLECASE_MAPPING=0x400A,
+#ifndef U_HIDE_DEPRECATED_API
/** String property Unicode_1_Name.
This property is of little practical value.
Beginning with ICU 49, ICU APIs return an empty string for this property.
Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). @deprecated ICU 49 */
UCHAR_UNICODE_1_NAME=0x400B,
+#endif /* U_HIDE_DEPRECATED_API */
/** String property Uppercase_Mapping.
Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */
UCHAR_UPPERCASE_MAPPING=0x400C,
+ /** String property Bidi_Paired_Bracket (new in Unicode 6.3).
+ Corresponds to u_getBidiPairedBracket. @stable ICU 52 */
+ UCHAR_BIDI_PAIRED_BRACKET=0x400D,
/** One more than the last constant for string Unicode properties. @stable ICU 2.4 */
- UCHAR_STRING_LIMIT=0x400D,
- /** Provisional property Script_Extensions (new in Unicode 6.0).
- As a provisional property, it may be modified or removed
- in future versions of the Unicode Standard, and thus in ICU.
+ UCHAR_STRING_LIMIT=0x400E,
+
+ /** Miscellaneous property Script_Extensions (new in Unicode 6.0).
Some characters are commonly used in multiple scripts.
For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h.
U_DIR_NON_SPACING_MARK = 17,
/** BN @stable ICU 2.0 */
U_BOUNDARY_NEUTRAL = 18,
+ /** FSI @stable ICU 52 */
+ U_FIRST_STRONG_ISOLATE = 19,
+ /** LRI @stable ICU 52 */
+ U_LEFT_TO_RIGHT_ISOLATE = 20,
+ /** RLI @stable ICU 52 */
+ U_RIGHT_TO_LEFT_ISOLATE = 21,
+ /** PDI @stable ICU 52 */
+ U_POP_DIRECTIONAL_ISOLATE = 22,
/** @stable ICU 2.0 */
U_CHAR_DIRECTION_COUNT
} UCharDirection;
+/**
+ * Bidi Paired Bracket Type constants.
+ *
+ * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE
+ * @stable ICU 52
+ */
+typedef enum UBidiPairedBracketType {
+ /*
+ * Note: UBidiPairedBracketType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_BPT_<Unicode Bidi_Paired_Bracket_Type value name>
+ */
+
+ /** Not a paired bracket. @stable ICU 52 */
+ U_BPT_NONE,
+ /** Open paired bracket. @stable ICU 52 */
+ U_BPT_OPEN,
+ /** Close paired bracket. @stable ICU 52 */
+ U_BPT_CLOSE,
+ /** @stable ICU 52 */
+ U_BPT_COUNT /* 3 */
+} UBidiPairedBracketType;
+
/**
* Constants for Unicode blocks, see the Unicode Data file Blocks.txt
* @stable ICU 2.0
* Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
* @stable ICU 2.2
*/
- UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT,
+ UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT,
/** @stable ICU 2.2 */
UBLOCK_TAGALOG = 98, /*[1700]*/
/** @stable ICU 2.2 */
/** @stable ICU 49 */
UBLOCK_TAKRI = 220, /*[11680]*/
- /** @stable ICU 2.0 */
- UBLOCK_COUNT = 221,
+ /* New blocks in Unicode 7.0 */
+
+ /** @stable ICU 54 */
+ UBLOCK_BASSA_VAH = 221, /*[16AD0]*/
+ /** @stable ICU 54 */
+ UBLOCK_CAUCASIAN_ALBANIAN = 222, /*[10530]*/
+ /** @stable ICU 54 */
+ UBLOCK_COPTIC_EPACT_NUMBERS = 223, /*[102E0]*/
+ /** @stable ICU 54 */
+ UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED = 224, /*[1AB0]*/
+ /** @stable ICU 54 */
+ UBLOCK_DUPLOYAN = 225, /*[1BC00]*/
+ /** @stable ICU 54 */
+ UBLOCK_ELBASAN = 226, /*[10500]*/
+ /** @stable ICU 54 */
+ UBLOCK_GEOMETRIC_SHAPES_EXTENDED = 227, /*[1F780]*/
+ /** @stable ICU 54 */
+ UBLOCK_GRANTHA = 228, /*[11300]*/
+ /** @stable ICU 54 */
+ UBLOCK_KHOJKI = 229, /*[11200]*/
+ /** @stable ICU 54 */
+ UBLOCK_KHUDAWADI = 230, /*[112B0]*/
+ /** @stable ICU 54 */
+ UBLOCK_LATIN_EXTENDED_E = 231, /*[AB30]*/
+ /** @stable ICU 54 */
+ UBLOCK_LINEAR_A = 232, /*[10600]*/
+ /** @stable ICU 54 */
+ UBLOCK_MAHAJANI = 233, /*[11150]*/
+ /** @stable ICU 54 */
+ UBLOCK_MANICHAEAN = 234, /*[10AC0]*/
+ /** @stable ICU 54 */
+ UBLOCK_MENDE_KIKAKUI = 235, /*[1E800]*/
+ /** @stable ICU 54 */
+ UBLOCK_MODI = 236, /*[11600]*/
+ /** @stable ICU 54 */
+ UBLOCK_MRO = 237, /*[16A40]*/
+ /** @stable ICU 54 */
+ UBLOCK_MYANMAR_EXTENDED_B = 238, /*[A9E0]*/
+ /** @stable ICU 54 */
+ UBLOCK_NABATAEAN = 239, /*[10880]*/
+ /** @stable ICU 54 */
+ UBLOCK_OLD_NORTH_ARABIAN = 240, /*[10A80]*/
+ /** @stable ICU 54 */
+ UBLOCK_OLD_PERMIC = 241, /*[10350]*/
+ /** @stable ICU 54 */
+ UBLOCK_ORNAMENTAL_DINGBATS = 242, /*[1F650]*/
+ /** @stable ICU 54 */
+ UBLOCK_PAHAWH_HMONG = 243, /*[16B00]*/
+ /** @stable ICU 54 */
+ UBLOCK_PALMYRENE = 244, /*[10860]*/
+ /** @stable ICU 54 */
+ UBLOCK_PAU_CIN_HAU = 245, /*[11AC0]*/
+ /** @stable ICU 54 */
+ UBLOCK_PSALTER_PAHLAVI = 246, /*[10B80]*/
+ /** @stable ICU 54 */
+ UBLOCK_SHORTHAND_FORMAT_CONTROLS = 247, /*[1BCA0]*/
+ /** @stable ICU 54 */
+ UBLOCK_SIDDHAM = 248, /*[11580]*/
+ /** @stable ICU 54 */
+ UBLOCK_SINHALA_ARCHAIC_NUMBERS = 249, /*[111E0]*/
+ /** @stable ICU 54 */
+ UBLOCK_SUPPLEMENTAL_ARROWS_C = 250, /*[1F800]*/
+ /** @stable ICU 54 */
+ UBLOCK_TIRHUTA = 251, /*[11480]*/
+ /** @stable ICU 54 */
+ UBLOCK_WARANG_CITI = 252, /*[118A0]*/
+
+ /* New blocks in Unicode 8.0 */
+
+ /** @stable ICU 56 */
+ UBLOCK_AHOM = 253, /*[11700]*/
+ /** @stable ICU 56 */
+ UBLOCK_ANATOLIAN_HIEROGLYPHS = 254, /*[14400]*/
+ /** @stable ICU 56 */
+ UBLOCK_CHEROKEE_SUPPLEMENT = 255, /*[AB70]*/
+ /** @stable ICU 56 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 256, /*[2B820]*/
+ /** @stable ICU 56 */
+ UBLOCK_EARLY_DYNASTIC_CUNEIFORM = 257, /*[12480]*/
+ /** @stable ICU 56 */
+ UBLOCK_HATRAN = 258, /*[108E0]*/
+ /** @stable ICU 56 */
+ UBLOCK_MULTANI = 259, /*[11280]*/
+ /** @stable ICU 56 */
+ UBLOCK_OLD_HUNGARIAN = 260, /*[10C80]*/
+ /** @stable ICU 56 */
+ UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 261, /*[1F900]*/
+ /** @stable ICU 56 */
+ UBLOCK_SUTTON_SIGNWRITING = 262, /*[1D800]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_COUNT = 263,
/** @stable ICU 2.0 */
UBLOCK_INVALID_CODE=-1
typedef enum UCharNameChoice {
/** Unicode character name (Name property). @stable ICU 2.0 */
U_UNICODE_CHAR_NAME,
-#ifndef U_HIDE_DEPRECATED_API
+#ifndef U_HIDE_DEPRECATED_API
/**
* The Unicode_1_Name property value which is of little practical value.
* Beginning with ICU 49, ICU APIs return an empty string for this name choice.
U_JG_FARSI_YEH, /**< @stable ICU 4.4 */
U_JG_NYA, /**< @stable ICU 4.4 */
U_JG_ROHINGYA_YEH, /**< @stable ICU 49 */
+ U_JG_MANICHAEAN_ALEPH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_AYIN, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_BETH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_DALETH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_DHAMEDH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_FIVE, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_GIMEL, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_HETH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_HUNDRED, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_KAPH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_LAMEDH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_MEM, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_NUN, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_ONE, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_PE, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_QOPH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_RESH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_SADHE, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_SAMEKH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_TAW, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_TEN, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_TETH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_THAMEDH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_TWENTY, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_WAW, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_YODH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_ZAYIN, /**< @stable ICU 54 */
+ U_JG_STRAIGHT_WAW, /**< @stable ICU 54 */
U_JG_COUNT
} UJoiningGroup;
U_WB_MIDNUMLET =11, /*[MB]*/
U_WB_NEWLINE =12, /*[NL]*/
U_WB_REGIONAL_INDICATOR = 13, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
- U_WB_COUNT = 14
+ U_WB_HEBREW_LETTER = 14, /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
+ U_WB_SINGLE_QUOTE = 15, /*[SQ]*/
+ U_WB_DOUBLE_QUOTE = 16, /*[DQ]*/
+ U_WB_COUNT = 17
} UWordBreakValues;
/**
* as the mirror-image of the default glyph of the specified
* character. This is useful for text conversion to and from
* codepages with visual order, and for displays without glyph
- * selecetion capabilities.
+ * selection capabilities.
*
* @param c the code point to be mapped
* @return another Unicode code point that may serve as a mirror-image
U_STABLE UChar32 U_EXPORT2
u_charMirror(UChar32 c);
+/**
+ * Maps the specified character to its paired bracket character.
+ * For Bidi_Paired_Bracket_Type!=None, this is the same as u_charMirror().
+ * Otherwise c itself is returned.
+ * See http://www.unicode.org/reports/tr9/
+ *
+ * @param c the code point to be mapped
+ * @return the paired bracket code point,
+ * or c itself if there is no such mapping
+ * (Bidi_Paired_Bracket_Type=None)
+ *
+ * @see UCHAR_BIDI_PAIRED_BRACKET
+ * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE
+ * @see u_charMirror
+ * @stable ICU 52
+ */
+U_STABLE UChar32 U_EXPORT2
+u_getBidiPairedBracket(UChar32 c);
+
/**
* Returns the general category value for the code point.
*
char *buffer, int32_t bufferLength,
UErrorCode *pErrorCode);
-#ifndef U_HIDE_DEPRECATED_API
+#ifndef U_HIDE_DEPRECATED_API
/**
* Returns an empty string.
* Used to return the ISO 10646 comment for a character.
*
* @deprecated ICU 49
*/
-U_STABLE int32_t U_EXPORT2
+U_DEPRECATED int32_t U_EXPORT2
u_getISOComment(UChar32 c,
char *dest, int32_t destCapacity,
UErrorCode *pErrorCode);