#include "unicode/utypes.h"
#include "unicode/stringoptions.h"
+#include "unicode/ucpmap.h"
+
+#if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN)
+
+#define USET_DEFINED
+
+/**
+ * USet is the C API type corresponding to C++ class UnicodeSet.
+ * It is forward-declared here to avoid including unicode/uset.h file if related
+ * APIs are not used.
+ *
+ * @see ucnv_getUnicodeSet
+ * @stable ICU 2.4
+ */
+typedef struct USet USet;
+
+#endif
+
U_CDECL_BEGIN
* @see u_getUnicodeVersion
* @stable ICU 2.0
*/
-#define U_UNICODE_VERSION "11.0"
+#define U_UNICODE_VERSION "12.1"
/**
* \file
* "About the Unicode Character Database" (http://www.unicode.org/ucd/)
* and the ICU User Guide chapter on Properties (http://icu-project.org/userguide/properties.html).
*
+ * Many properties are accessible via generic functions that take a UProperty selector.
+ * - u_hasBinaryProperty() returns a binary value (TRUE/FALSE) per property and code point.
+ * - u_getIntPropertyValue() returns an integer value per property and code point.
+ * For each supported enumerated or catalog property, there is
+ * an enum type for all of the property's values, and
+ * u_getIntPropertyValue() returns the numeric values of those constants.
+ * - u_getBinaryPropertySet() returns a set for each ICU-supported binary property with
+ * all code points for which the property is true.
+ * - u_getIntPropertyMap() returns a map for each
+ * ICU-supported enumerated/catalog/int-valued property which
+ * maps all Unicode code points to their values for that property.
+ *
* Many functions are designed to match java.lang.Character functions.
* See the individual function documentation,
* and see the JDK 1.4 java.lang.Character documentation
(http://www.unicode.org/reports/tr9/)
Returns UBidiPairedBracketType values. @stable ICU 52 */
UCHAR_BIDI_PAIRED_BRACKET_TYPE=0x1015,
+ /**
+ * Enumerated property Indic_Positional_Category.
+ * New in Unicode 6.0 as provisional property Indic_Matra_Category;
+ * renamed and changed to informative in Unicode 8.0.
+ * See http://www.unicode.org/reports/tr44/#IndicPositionalCategory.txt
+ * @stable ICU 63
+ */
+ UCHAR_INDIC_POSITIONAL_CATEGORY=0x1016,
+ /**
+ * Enumerated property Indic_Syllabic_Category.
+ * New in Unicode 6.0 as provisional; informative since Unicode 8.0.
+ * See http://www.unicode.org/reports/tr44/#IndicSyllabicCategory.txt
+ * @stable ICU 63
+ */
+ UCHAR_INDIC_SYLLABIC_CATEGORY=0x1017,
+ /**
+ * Enumerated property Vertical_Orientation.
+ * Used for UAX #50 Unicode Vertical Text Layout (https://www.unicode.org/reports/tr50/).
+ * New as a UCD property in Unicode 10.0.
+ * @stable ICU 63
+ */
+ UCHAR_VERTICAL_ORIENTATION=0x1018,
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the last constant for enumerated/integer Unicode properties.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
- UCHAR_INT_LIMIT=0x1016,
+ UCHAR_INT_LIMIT=0x1019,
#endif // U_HIDE_DEPRECATED_API
/** Bitmask property General_Category_Mask.
/** @stable ICU 62 */
UBLOCK_SOGDIAN = 291, /*[10F30]*/
+ // New blocks in Unicode 12.0
+
+ /** @stable ICU 64 */
+ UBLOCK_EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 292, /*[13430]*/
+ /** @stable ICU 64 */
+ UBLOCK_ELYMAIC = 293, /*[10FE0]*/
+ /** @stable ICU 64 */
+ UBLOCK_NANDINAGARI = 294, /*[119A0]*/
+ /** @stable ICU 64 */
+ UBLOCK_NYIAKENG_PUACHUE_HMONG = 295, /*[1E100]*/
+ /** @stable ICU 64 */
+ UBLOCK_OTTOMAN_SIYAQ_NUMBERS = 296, /*[1ED00]*/
+ /** @stable ICU 64 */
+ UBLOCK_SMALL_KANA_EXTENSION = 297, /*[1B130]*/
+ /** @stable ICU 64 */
+ UBLOCK_SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 298, /*[1FA70]*/
+ /** @stable ICU 64 */
+ UBLOCK_TAMIL_SUPPLEMENT = 299, /*[11FC0]*/
+ /** @stable ICU 64 */
+ UBLOCK_WANCHO = 300, /*[1E2C0]*/
+
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal UBlockCode value.
*
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
- UBLOCK_COUNT = 292,
+ UBLOCK_COUNT = 301,
#endif // U_HIDE_DEPRECATED_API
/** @stable ICU 2.0 */
#endif // U_HIDE_DEPRECATED_API
} UHangulSyllableType;
+/**
+ * Indic Positional Category constants.
+ *
+ * @see UCHAR_INDIC_POSITIONAL_CATEGORY
+ * @stable ICU 63
+ */
+typedef enum UIndicPositionalCategory {
+ /*
+ * Note: UIndicPositionalCategory constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_INPC_<Unicode Indic_Positional_Category value name>
+ */
+
+ /** @stable ICU 63 */
+ U_INPC_NA,
+ /** @stable ICU 63 */
+ U_INPC_BOTTOM,
+ /** @stable ICU 63 */
+ U_INPC_BOTTOM_AND_LEFT,
+ /** @stable ICU 63 */
+ U_INPC_BOTTOM_AND_RIGHT,
+ /** @stable ICU 63 */
+ U_INPC_LEFT,
+ /** @stable ICU 63 */
+ U_INPC_LEFT_AND_RIGHT,
+ /** @stable ICU 63 */
+ U_INPC_OVERSTRUCK,
+ /** @stable ICU 63 */
+ U_INPC_RIGHT,
+ /** @stable ICU 63 */
+ U_INPC_TOP,
+ /** @stable ICU 63 */
+ U_INPC_TOP_AND_BOTTOM,
+ /** @stable ICU 63 */
+ U_INPC_TOP_AND_BOTTOM_AND_RIGHT,
+ /** @stable ICU 63 */
+ U_INPC_TOP_AND_LEFT,
+ /** @stable ICU 63 */
+ U_INPC_TOP_AND_LEFT_AND_RIGHT,
+ /** @stable ICU 63 */
+ U_INPC_TOP_AND_RIGHT,
+ /** @stable ICU 63 */
+ U_INPC_VISUAL_ORDER_LEFT,
+} UIndicPositionalCategory;
+
+/**
+ * Indic Syllabic Category constants.
+ *
+ * @see UCHAR_INDIC_SYLLABIC_CATEGORY
+ * @stable ICU 63
+ */
+typedef enum UIndicSyllabicCategory {
+ /*
+ * Note: UIndicSyllabicCategory constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_INSC_<Unicode Indic_Syllabic_Category value name>
+ */
+
+ /** @stable ICU 63 */
+ U_INSC_OTHER,
+ /** @stable ICU 63 */
+ U_INSC_AVAGRAHA,
+ /** @stable ICU 63 */
+ U_INSC_BINDU,
+ /** @stable ICU 63 */
+ U_INSC_BRAHMI_JOINING_NUMBER,
+ /** @stable ICU 63 */
+ U_INSC_CANTILLATION_MARK,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_DEAD,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_FINAL,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_HEAD_LETTER,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_INITIAL_POSTFIXED,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_KILLER,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_MEDIAL,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_PLACEHOLDER,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_PRECEDING_REPHA,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_PREFIXED,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_SUBJOINED,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_SUCCEEDING_REPHA,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_WITH_STACKER,
+ /** @stable ICU 63 */
+ U_INSC_GEMINATION_MARK,
+ /** @stable ICU 63 */
+ U_INSC_INVISIBLE_STACKER,
+ /** @stable ICU 63 */
+ U_INSC_JOINER,
+ /** @stable ICU 63 */
+ U_INSC_MODIFYING_LETTER,
+ /** @stable ICU 63 */
+ U_INSC_NON_JOINER,
+ /** @stable ICU 63 */
+ U_INSC_NUKTA,
+ /** @stable ICU 63 */
+ U_INSC_NUMBER,
+ /** @stable ICU 63 */
+ U_INSC_NUMBER_JOINER,
+ /** @stable ICU 63 */
+ U_INSC_PURE_KILLER,
+ /** @stable ICU 63 */
+ U_INSC_REGISTER_SHIFTER,
+ /** @stable ICU 63 */
+ U_INSC_SYLLABLE_MODIFIER,
+ /** @stable ICU 63 */
+ U_INSC_TONE_LETTER,
+ /** @stable ICU 63 */
+ U_INSC_TONE_MARK,
+ /** @stable ICU 63 */
+ U_INSC_VIRAMA,
+ /** @stable ICU 63 */
+ U_INSC_VISARGA,
+ /** @stable ICU 63 */
+ U_INSC_VOWEL,
+ /** @stable ICU 63 */
+ U_INSC_VOWEL_DEPENDENT,
+ /** @stable ICU 63 */
+ U_INSC_VOWEL_INDEPENDENT,
+} UIndicSyllabicCategory;
+
+/**
+ * Vertical Orientation constants.
+ *
+ * @see UCHAR_VERTICAL_ORIENTATION
+ * @stable ICU 63
+ */
+typedef enum UVerticalOrientation {
+ /*
+ * Note: UVerticalOrientation constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_VO_<Unicode Vertical_Orientation value name>
+ */
+
+ /** @stable ICU 63 */
+ U_VO_ROTATED,
+ /** @stable ICU 63 */
+ U_VO_TRANSFORMED_ROTATED,
+ /** @stable ICU 63 */
+ U_VO_TRANSFORMED_UPRIGHT,
+ /** @stable ICU 63 */
+ U_VO_UPRIGHT,
+} UVerticalOrientation;
+
/**
* Check a binary Unicode property for a code point.
*
* does not have data for the property at all, or not for this code point.
*
* @see UProperty
+ * @see u_getBinaryPropertySet
* @see u_getIntPropertyValue
* @see u_getUnicodeVersion
* @stable ICU 2.1
U_STABLE UBool U_EXPORT2
u_hasBinaryProperty(UChar32 c, UProperty which);
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Returns a frozen USet for a binary property.
+ * The library retains ownership over the returned object.
+ * Sets an error code if the property number is not one for a binary property.
+ *
+ * The returned set contains all code points for which the property is true.
+ *
+ * @param property UCHAR_BINARY_START..UCHAR_BINARY_LIMIT-1
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @return the property as a set
+ * @see UProperty
+ * @see u_hasBinaryProperty
+ * @see Unicode::fromUSet
+ * @draft ICU 63
+ */
+U_CAPI const USet * U_EXPORT2
+u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode);
+
+#endif // U_HIDE_DRAFT_API
+
/**
* Check if a code point has the Alphabetic Unicode property.
* Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC).
* @see u_hasBinaryProperty
* @see u_getIntPropertyMinValue
* @see u_getIntPropertyMaxValue
+ * @see u_getIntPropertyMap
* @see u_getUnicodeVersion
* @stable ICU 2.2
*/
U_STABLE int32_t U_EXPORT2
u_getIntPropertyMaxValue(UProperty which);
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Returns an immutable UCPMap for an enumerated/catalog/int-valued property.
+ * The library retains ownership over the returned object.
+ * Sets an error code if the property number is not one for an "int property".
+ *
+ * The returned object maps all Unicode code points to their values for that property.
+ * For documentation of the integer values see u_getIntPropertyValue().
+ *
+ * @param property UCHAR_INT_START..UCHAR_INT_LIMIT-1
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @return the property as a map
+ * @see UProperty
+ * @see u_getIntPropertyValue
+ * @draft ICU 63
+ */
+U_CAPI const UCPMap * U_EXPORT2
+u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode);
+
+#endif // U_HIDE_DRAFT_API
+
/**
* Get the numeric value for a Unicode code point as defined in the
* Unicode Character Database.
* True for general categories "L" (letters), "N" (numbers),
* "Mc" (spacing combining marks), and "Me" (enclosing marks).
*
- * Note that this is different from the Unicode definition in
- * chapter 3.5, conformance clause D13,
+ * Note that this is different from the definition in the Unicode
+ * Standard section 3.6, conformance clause D51,
* which defines base characters to be all characters (not Cn)
* that do not graphically combine with preceding characters (M)
* and that are neither control (Cc) or format (Cf) characters.