+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
-* Copyright (C) 2002-2010, International Business Machines
+* Copyright (C) 2002-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: uprops.h
-* encoding: US-ASCII
+* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
/* constants for the storage form of numeric types and values */
enum {
+ /** No numeric value. */
UPROPS_NTV_NONE=0,
+ /** Decimal digits: nv=0..9 */
UPROPS_NTV_DECIMAL_START=1,
+ /** Other digits: nv=0..9 */
UPROPS_NTV_DIGIT_START=11,
+ /** Small integers: nv=0..154 */
UPROPS_NTV_NUMERIC_START=21,
+ /** Fractions: ((ntv>>4)-12) / ((ntv&0xf)+1) = -1..17 / 1..16 */
UPROPS_NTV_FRACTION_START=0xb0,
+ /**
+ * Large integers:
+ * ((ntv>>5)-14) * 10^((ntv&0x1f)+2) = (1..9)*(10^2..10^33)
+ * (only one significant decimal digit)
+ */
UPROPS_NTV_LARGE_START=0x1e0,
- UPROPS_NTV_RESERVED_START=0x300,
+ /**
+ * Sexagesimal numbers:
+ * ((ntv>>2)-0xbf) * 60^((ntv&3)+1) = (1..9)*(60^1..60^4)
+ */
+ UPROPS_NTV_BASE60_START=0x300,
+ /**
+ * Fraction-20 values:
+ * frac20 = ntv-0x324 = 0..0x17 -> 1|3|5|7 / 20|40|80|160|320|640
+ * numerator: num = 2*(frac20&3)+1
+ * denominator: den = 20<<(frac20>>2)
+ */
+ UPROPS_NTV_FRACTION20_START=UPROPS_NTV_BASE60_START+36, // 0x300+9*4=0x324
+ /**
+ * Fraction-32 values:
+ * frac32 = ntv-0x34c = 0..15 -> 1|3|5|7 / 32|64|128|256
+ * numerator: num = 2*(frac32&3)+1
+ * denominator: den = 32<<(frac32>>2)
+ */
+ UPROPS_NTV_FRACTION32_START=UPROPS_NTV_FRACTION20_START+24, // 0x324+6*4=0x34c
+ /** No numeric value (yet). */
+ UPROPS_NTV_RESERVED_START=UPROPS_NTV_FRACTION32_START+16, // 0x34c+4*4=0x35c
UPROPS_NTV_MAX_SMALL_INT=UPROPS_NTV_FRACTION_START-UPROPS_NTV_NUMERIC_START-1
};
UPROPS_VARIATION_SELECTOR,
UPROPS_PATTERN_SYNTAX, /* new in ICU 3.4 and Unicode 4.1 */
UPROPS_PATTERN_WHITE_SPACE,
- UPROPS_RESERVED, /* reserved & unused */
+ UPROPS_PREPENDED_CONCATENATION_MARK, // new in ICU 60 and Unicode 10
UPROPS_BINARY_1_TOP /* ==32 - full! */
};
/*
* Properties in vector word 2
* Bits
- * 31..26 reserved
+ * 31..26 http://www.unicode.org/reports/tr51/#Emoji_Properties
* 25..20 Line Break
* 19..15 Sentence Break
* 14..10 Word Break
* 9.. 5 Grapheme Cluster Break
* 4.. 0 Decomposition Type
*/
+enum {
+ UPROPS_2_EXTENDED_PICTOGRAPHIC=26,
+ UPROPS_2_EMOJI_COMPONENT,
+ UPROPS_2_EMOJI,
+ UPROPS_2_EMOJI_PRESENTATION,
+ UPROPS_2_EMOJI_MODIFIER,
+ UPROPS_2_EMOJI_MODIFIER_BASE
+};
+
#define UPROPS_LB_MASK 0x03f00000
#define UPROPS_LB_SHIFT 20
#define UPROPS_DT_MASK 0x0000001f
+/**
+ * Gets the main properties value for a code point.
+ * Implemented in uchar.c for uprops.cpp.
+ */
+U_CFUNC uint32_t
+u_getMainProperties(UChar32 c);
+
/**
* Get a properties vector word for a code point.
- * Implemented in uchar.c for uprops.c.
- * column==-1 gets the 32-bit main properties word instead.
+ * Implemented in uchar.c for uprops.cpp.
* @return 0 if no data or illegal argument
*/
U_CFUNC uint32_t
UPROPS_SRC_NFKC_CF,
/** From normalizer2impl.cpp/nfc.nrm canonical iterator data */
UPROPS_SRC_NFC_CANON_ITER,
+ // Text layout properties.
+ UPROPS_SRC_INPC,
+ UPROPS_SRC_INSC,
+ UPROPS_SRC_VO,
/** One more than the highest UPropertySource (UPROPS_SRC_) constant. */
UPROPS_SRC_COUNT
};
U_CFUNC void U_EXPORT2
upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);
+U_CFUNC void U_EXPORT2
+uprops_addPropertyStarts(UPropertySource src, const USetAdder *sa, UErrorCode *pErrorCode);
+
/**
* Return a set of characters for property enumeration.
* For each two consecutive characters (start, limit) in the set,
uprv_getInclusions(const USetAdder *sa, UErrorCode *pErrorCode);
*/
-/**
- * Swap the ICU Unicode properties file. See uchar.c.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-uprops_swap(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode);
-
/**
* Swap the ICU Unicode character names file. See uchar.c.
* @internal
const void *inData, int32_t length, void *outData,
UErrorCode *pErrorCode);
-#ifdef XP_CPLUSPLUS
+#ifdef __cplusplus
U_NAMESPACE_BEGIN
class UnicodeSet;
+class CharacterProperties {
+public:
+ CharacterProperties() = delete;
+ static const UnicodeSet *getInclusionsForProperty(UProperty prop, UErrorCode &errorCode);
+};
+
// implemented in uniset_props.cpp
U_CFUNC UnicodeSet *
uniset_getUnicode32Instance(UErrorCode &errorCode);