2 ********************************************************************************
3 * Copyright (C) 1996-2008, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 ********************************************************************************
9 * Modification History:
11 * Date Name Description
12 * 04/02/97 aliu Creation.
13 * 4/15/99 Madhu Updated all the function definitions for C Implementation
14 * 5/20/99 Madhu Added the function u_getVersion()
15 * 8/19/1999 srl Upgraded scripts to Unicode3.0
16 * 11/11/1999 weiv added u_isalnum(), cleaned comments
17 * 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion.
18 * 06/20/2000 helena OS/400 port changes; mostly typecast.
19 ******************************************************************************
22 #include "unicode/utypes.h"
23 #include "unicode/uchar.h"
24 #include "unicode/uscript.h"
25 #include "unicode/udata.h"
31 #include "unormimp.h" /* JAMO_L_BASE etc. */
34 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
36 /* dynamically loaded Unicode character properties -------------------------- */
38 #define UCHAR_HARDCODE_DATA 1
40 #if UCHAR_HARDCODE_DATA
42 /* uchar_props_data.c is machine-generated by genprops --csource */
43 #include "uchar_props_data.c"
49 * for a description of the file format, see icu/source/tools/genprops/store.c
51 static const char DATA_NAME
[] = "uprops";
52 static const char DATA_TYPE
[] = "icu";
54 static UDataMemory
*propsData
=NULL
;
55 static UErrorCode dataErrorCode
=U_ZERO_ERROR
;
57 static uint8_t formatVersion
[4]={ 0, 0, 0, 0 };
58 static UVersionInfo dataVersion
={ 0, 0, 0, 0 };
60 static UTrie propsTrie
={ 0 }, propsVectorsTrie
={ 0 };
61 static const uint32_t *pData32
=NULL
, *propsVectors
=NULL
;
62 static int32_t countPropsVectors
=0, propsVectorsColumns
=0;
64 static int8_t havePropsData
=0; /* == 0 -> Data has not been loaded.
65 * < 0 -> Error occured attempting to load data.
66 * > 0 -> Data has been successfully loaded.
69 /* index values loaded from uprops.dat */
70 static int32_t indexes
[UPROPS_INDEX_COUNT
];
72 static UBool U_CALLCONV
73 isAcceptable(void *context
,
74 const char *type
, const char *name
,
75 const UDataInfo
*pInfo
) {
78 pInfo
->isBigEndian
==U_IS_BIG_ENDIAN
&&
79 pInfo
->charsetFamily
==U_CHARSET_FAMILY
&&
80 pInfo
->dataFormat
[0]==0x55 && /* dataFormat="UPro" */
81 pInfo
->dataFormat
[1]==0x50 &&
82 pInfo
->dataFormat
[2]==0x72 &&
83 pInfo
->dataFormat
[3]==0x6f &&
84 pInfo
->formatVersion
[0]==4 &&
85 pInfo
->formatVersion
[2]==UTRIE_SHIFT
&&
86 pInfo
->formatVersion
[3]==UTRIE_INDEX_SHIFT
88 uprv_memcpy(formatVersion
, pInfo
->formatVersion
, 4);
89 uprv_memcpy(dataVersion
, pInfo
->dataVersion
, 4);
96 static UBool U_CALLCONV
uchar_cleanup(void)
99 udata_close(propsData
);
105 uprv_memset(dataVersion
, 0, U_MAX_VERSION_LENGTH
);
106 dataErrorCode
=U_ZERO_ERROR
;
113 UDataMemory
*propsData
;
114 UTrie propsTrie
, propsVectorsTrie
;
115 const uint32_t *pData32
;
117 typedef struct UCharProps UCharProps
;
119 /* open uprops.icu */
121 _openProps(UCharProps
*ucp
, UErrorCode
*pErrorCode
) {
125 ucp
->propsData
=udata_openChoice(NULL
, DATA_TYPE
, DATA_NAME
, isAcceptable
, NULL
, pErrorCode
);
126 if(U_FAILURE(*pErrorCode
)) {
130 ucp
->pData32
=p
=(const uint32_t *)udata_getMemory(ucp
->propsData
);
132 /* unserialize the trie; it is directly after the int32_t indexes[UPROPS_INDEX_COUNT] */
133 length
=(int32_t)p
[UPROPS_PROPS32_INDEX
]*4;
134 length
=utrie_unserialize(&ucp
->propsTrie
, (const uint8_t *)(p
+UPROPS_INDEX_COUNT
), length
-64, pErrorCode
);
135 if(U_FAILURE(*pErrorCode
)) {
139 /* unserialize the properties vectors trie */
140 length
=(int32_t)(p
[UPROPS_ADDITIONAL_VECTORS_INDEX
]-p
[UPROPS_ADDITIONAL_TRIE_INDEX
])*4;
142 length
=utrie_unserialize(&ucp
->propsVectorsTrie
, (const uint8_t *)(p
+p
[UPROPS_ADDITIONAL_TRIE_INDEX
]), length
, pErrorCode
);
144 if(length
<=0 || U_FAILURE(*pErrorCode
)) {
147 * Allow the properties vectors trie to be missing -
148 * also requires propsVectorsColumns=indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]
149 * to be zero so that this trie is never accessed.
151 uprv_memset(&ucp
->propsVectorsTrie
, 0, sizeof(ucp
->propsVectorsTrie
));
157 #if !UCHAR_HARDCODE_DATA
159 uprv_loadPropsData(UErrorCode
*pErrorCode
) {
160 /* load Unicode character properties data from file if necessary */
163 * This lazy intialization with double-checked locking (without mutex protection for
164 * haveNormData==0) is transiently unsafe under certain circumstances.
165 * Check the readme and use u_init() if necessary.
167 if(havePropsData
==0) {
168 UCharProps ucp
={ NULL
};
170 if(U_FAILURE(*pErrorCode
)) {
171 return havePropsData
;
174 /* open the data outside the mutex block */
175 _openProps(&ucp
, pErrorCode
);
177 if(U_SUCCESS(*pErrorCode
)) {
178 /* in the mutex block, set the data for this process */
180 if(propsData
==NULL
) {
181 propsData
=ucp
.propsData
;
185 uprv_memcpy(&propsTrie
, &ucp
.propsTrie
, sizeof(propsTrie
));
186 uprv_memcpy(&propsVectorsTrie
, &ucp
.propsVectorsTrie
, sizeof(propsVectorsTrie
));
189 /* initialize some variables */
190 uprv_memcpy(indexes
, pData32
, sizeof(indexes
));
192 /* additional properties */
193 if(indexes
[UPROPS_ADDITIONAL_VECTORS_INDEX
]!=0) {
194 propsVectors
=pData32
+indexes
[UPROPS_ADDITIONAL_VECTORS_INDEX
];
195 countPropsVectors
=indexes
[UPROPS_RESERVED_INDEX
]-indexes
[UPROPS_ADDITIONAL_VECTORS_INDEX
];
196 propsVectorsColumns
=indexes
[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX
];
202 dataErrorCode
=*pErrorCode
;
205 ucln_common_registerCleanup(UCLN_COMMON_UCHAR
, uchar_cleanup
);
207 /* if a different thread set it first, then close the extra data */
208 udata_close(ucp
.propsData
); /* NULL if it was set correctly */
211 return havePropsData
;
215 loadPropsData(void) {
216 UErrorCode errorCode
= U_ZERO_ERROR
;
217 int8_t retVal
= uprv_loadPropsData(&errorCode
);
223 /* constants and macros for access to the data ------------------------------ */
225 /* getting a uint32_t properties word from the data */
226 #if UCHAR_HARDCODE_DATA
228 #define GET_PROPS(c, result) UTRIE_GET16(&propsTrie, c, result);
232 #define HAVE_DATA (havePropsData>0 || loadPropsData()>0)
233 #define GET_PROPS_UNSAFE(c, result) \
234 UTRIE_GET16(&propsTrie, c, result);
235 #define GET_PROPS(c, result) \
237 GET_PROPS_UNSAFE(c, result); \
245 uprv_haveProperties(UErrorCode
*pErrorCode
) {
246 if(U_FAILURE(*pErrorCode
)) {
249 #if !UCHAR_HARDCODE_DATA
250 if(havePropsData
==0) {
251 uprv_loadPropsData(pErrorCode
);
253 if(havePropsData
<0) {
254 *pErrorCode
=dataErrorCode
;
261 /* API functions ------------------------------------------------------------ */
263 /* Gets the Unicode character's general category.*/
264 U_CAPI
int8_t U_EXPORT2
265 u_charType(UChar32 c
) {
268 return (int8_t)GET_CATEGORY(props
);
271 /* Enumerate all code points with their general categories. */
272 struct _EnumTypeCallback
{
273 UCharEnumTypeRange
*enumRange
;
277 static uint32_t U_CALLCONV
278 _enumTypeValue(const void *context
, uint32_t value
) {
279 return GET_CATEGORY(value
);
282 static UBool U_CALLCONV
283 _enumTypeRange(const void *context
, UChar32 start
, UChar32 limit
, uint32_t value
) {
284 /* just cast the value to UCharCategory */
285 return ((struct _EnumTypeCallback
*)context
)->
286 enumRange(((struct _EnumTypeCallback
*)context
)->context
,
287 start
, limit
, (UCharCategory
)value
);
290 U_CAPI
void U_EXPORT2
291 u_enumCharTypes(UCharEnumTypeRange
*enumRange
, const void *context
) {
292 struct _EnumTypeCallback callback
;
295 #if !UCHAR_HARDCODE_DATA
302 callback
.enumRange
=enumRange
;
303 callback
.context
=context
;
304 utrie_enum(&propsTrie
, _enumTypeValue
, _enumTypeRange
, &callback
);
307 /* Checks if ch is a lower case letter.*/
308 U_CAPI UBool U_EXPORT2
309 u_islower(UChar32 c
) {
312 return (UBool
)(GET_CATEGORY(props
)==U_LOWERCASE_LETTER
);
315 /* Checks if ch is an upper case letter.*/
316 U_CAPI UBool U_EXPORT2
317 u_isupper(UChar32 c
) {
320 return (UBool
)(GET_CATEGORY(props
)==U_UPPERCASE_LETTER
);
323 /* Checks if ch is a title case letter; usually upper case letters.*/
324 U_CAPI UBool U_EXPORT2
325 u_istitle(UChar32 c
) {
328 return (UBool
)(GET_CATEGORY(props
)==U_TITLECASE_LETTER
);
331 /* Checks if ch is a decimal digit. */
332 U_CAPI UBool U_EXPORT2
333 u_isdigit(UChar32 c
) {
336 return (UBool
)(GET_CATEGORY(props
)==U_DECIMAL_DIGIT_NUMBER
);
339 U_CAPI UBool U_EXPORT2
340 u_isxdigit(UChar32 c
) {
343 /* check ASCII and Fullwidth ASCII a-fA-F */
345 (c
<=0x66 && c
>=0x41 && (c
<=0x46 || c
>=0x61)) ||
346 (c
>=0xff21 && c
<=0xff46 && (c
<=0xff26 || c
>=0xff41))
352 return (UBool
)(GET_CATEGORY(props
)==U_DECIMAL_DIGIT_NUMBER
);
355 /* Checks if the Unicode character is a letter.*/
356 U_CAPI UBool U_EXPORT2
357 u_isalpha(UChar32 c
) {
360 return (UBool
)((CAT_MASK(props
)&U_GC_L_MASK
)!=0);
363 U_CAPI UBool U_EXPORT2
364 u_isUAlphabetic(UChar32 c
) {
365 return (u_getUnicodeProperties(c
, 1)&U_MASK(UPROPS_ALPHABETIC
))!=0;
368 /* Checks if c is a letter or a decimal digit */
369 U_CAPI UBool U_EXPORT2
370 u_isalnum(UChar32 c
) {
373 return (UBool
)((CAT_MASK(props
)&(U_GC_L_MASK
|U_GC_ND_MASK
))!=0);
377 * Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM.
381 u_isalnumPOSIX(UChar32 c
) {
382 return (UBool
)(u_isUAlphabetic(c
) || u_isdigit(c
));
385 /* Checks if ch is a unicode character with assigned character type.*/
386 U_CAPI UBool U_EXPORT2
387 u_isdefined(UChar32 c
) {
390 return (UBool
)(GET_CATEGORY(props
)!=0);
393 /* Checks if the Unicode character is a base form character that can take a diacritic.*/
394 U_CAPI UBool U_EXPORT2
395 u_isbase(UChar32 c
) {
398 return (UBool
)((CAT_MASK(props
)&(U_GC_L_MASK
|U_GC_N_MASK
|U_GC_MC_MASK
|U_GC_ME_MASK
))!=0);
401 /* Checks if the Unicode character is a control character.*/
402 U_CAPI UBool U_EXPORT2
403 u_iscntrl(UChar32 c
) {
406 return (UBool
)((CAT_MASK(props
)&(U_GC_CC_MASK
|U_GC_CF_MASK
|U_GC_ZL_MASK
|U_GC_ZP_MASK
))!=0);
409 U_CAPI UBool U_EXPORT2
410 u_isISOControl(UChar32 c
) {
411 return (uint32_t)c
<=0x9f && (c
<=0x1f || c
>=0x7f);
414 /* Some control characters that are used as space. */
415 #define IS_THAT_CONTROL_SPACE(c) \
416 (c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL))
418 /* Checks if the Unicode character is a space character.*/
419 U_CAPI UBool U_EXPORT2
420 u_isspace(UChar32 c
) {
423 return (UBool
)((CAT_MASK(props
)&U_GC_Z_MASK
)!=0 || IS_THAT_CONTROL_SPACE(c
));
426 U_CAPI UBool U_EXPORT2
427 u_isJavaSpaceChar(UChar32 c
) {
430 return (UBool
)((CAT_MASK(props
)&U_GC_Z_MASK
)!=0);
433 /* Checks if the Unicode character is a whitespace character.*/
434 U_CAPI UBool U_EXPORT2
435 u_isWhitespace(UChar32 c
) {
439 ((CAT_MASK(props
)&U_GC_Z_MASK
)!=0 &&
440 c
!=NBSP
&& c
!=FIGURESP
&& c
!=NNBSP
) || /* exclude no-break spaces */
441 IS_THAT_CONTROL_SPACE(c
)
445 U_CAPI UBool U_EXPORT2
446 u_isblank(UChar32 c
) {
447 if((uint32_t)c
<=0x9f) {
448 return c
==9 || c
==0x20; /* TAB or SPACE */
453 return (UBool
)(GET_CATEGORY(props
)==U_SPACE_SEPARATOR
);
457 U_CAPI UBool U_EXPORT2
458 u_isUWhiteSpace(UChar32 c
) {
459 return (u_getUnicodeProperties(c
, 1)&U_MASK(UPROPS_WHITE_SPACE
))!=0;
462 /* Checks if the Unicode character is printable.*/
463 U_CAPI UBool U_EXPORT2
464 u_isprint(UChar32 c
) {
467 /* comparing ==0 returns FALSE for the categories mentioned */
468 return (UBool
)((CAT_MASK(props
)&U_GC_C_MASK
)==0);
472 * Checks if c is in \p{graph}\p{blank} - \p{cntrl}.
473 * Implements UCHAR_POSIX_PRINT.
477 u_isprintPOSIX(UChar32 c
) {
481 * The only cntrl character in graph+blank is TAB (in blank).
482 * Here we implement (blank-TAB)=Zs instead of calling u_isblank().
484 return (UBool
)((GET_CATEGORY(props
)==U_SPACE_SEPARATOR
) || u_isgraphPOSIX(c
));
487 U_CAPI UBool U_EXPORT2
488 u_isgraph(UChar32 c
) {
491 /* comparing ==0 returns FALSE for the categories mentioned */
492 return (UBool
)((CAT_MASK(props
)&
493 (U_GC_CC_MASK
|U_GC_CF_MASK
|U_GC_CS_MASK
|U_GC_CN_MASK
|U_GC_Z_MASK
))
499 * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]
500 * with space=\p{Whitespace} and Control=Cc.
501 * Implements UCHAR_POSIX_GRAPH.
505 u_isgraphPOSIX(UChar32 c
) {
508 /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */
509 /* comparing ==0 returns FALSE for the categories mentioned */
510 return (UBool
)((CAT_MASK(props
)&
511 (U_GC_CC_MASK
|U_GC_CS_MASK
|U_GC_CN_MASK
|U_GC_Z_MASK
))
515 U_CAPI UBool U_EXPORT2
516 u_ispunct(UChar32 c
) {
519 return (UBool
)((CAT_MASK(props
)&U_GC_P_MASK
)!=0);
522 /* Checks if the Unicode character can start a Unicode identifier.*/
523 U_CAPI UBool U_EXPORT2
524 u_isIDStart(UChar32 c
) {
525 /* same as u_isalpha() */
528 return (UBool
)((CAT_MASK(props
)&(U_GC_L_MASK
|U_GC_NL_MASK
))!=0);
531 /* Checks if the Unicode character can be a Unicode identifier part other than starting the
533 U_CAPI UBool U_EXPORT2
534 u_isIDPart(UChar32 c
) {
539 (U_GC_ND_MASK
|U_GC_NL_MASK
|
541 U_GC_PC_MASK
|U_GC_MC_MASK
|U_GC_MN_MASK
)
546 /*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/
547 U_CAPI UBool U_EXPORT2
548 u_isIDIgnorable(UChar32 c
) {
550 return u_isISOControl(c
) && !IS_THAT_CONTROL_SPACE(c
);
554 return (UBool
)(GET_CATEGORY(props
)==U_FORMAT_CHAR
);
558 /*Checks if the Unicode character can start a Java identifier.*/
559 U_CAPI UBool U_EXPORT2
560 u_isJavaIDStart(UChar32 c
) {
563 return (UBool
)((CAT_MASK(props
)&(U_GC_L_MASK
|U_GC_SC_MASK
|U_GC_PC_MASK
))!=0);
566 /*Checks if the Unicode character can be a Java identifier part other than starting the
569 U_CAPI UBool U_EXPORT2
570 u_isJavaIDPart(UChar32 c
) {
575 (U_GC_ND_MASK
|U_GC_NL_MASK
|
577 U_GC_SC_MASK
|U_GC_PC_MASK
|
578 U_GC_MC_MASK
|U_GC_MN_MASK
)
583 U_CAPI
int32_t U_EXPORT2
584 u_charDigitValue(UChar32 c
) {
588 if(GET_NUMERIC_TYPE(props
)==1) {
589 return GET_NUMERIC_VALUE(props
);
595 U_CAPI
double U_EXPORT2
596 u_getNumericValue(UChar32 c
) {
597 uint32_t props
, numericType
, numericValue
;
599 numericType
=GET_NUMERIC_TYPE(props
);
601 if(numericType
==0 || numericType
>=UPROPS_NT_COUNT
) {
602 return U_NO_NUMERIC_VALUE
;
605 numericValue
=GET_NUMERIC_VALUE(props
);
607 if(numericType
<U_NT_COUNT
) {
608 /* normal type, the value is stored directly */
610 } else if(numericType
==UPROPS_NT_FRACTION
) {
613 uint32_t denominator
;
615 numerator
=(int32_t)numericValue
>>UPROPS_FRACTION_NUM_SHIFT
;
616 denominator
=(numericValue
&UPROPS_FRACTION_DEN_MASK
)+UPROPS_FRACTION_DEN_OFFSET
;
621 return (double)numerator
/(double)denominator
;
622 } else /* numericType==UPROPS_NT_LARGE */ {
623 /* large value with exponent */
627 mant
=(int32_t)numericValue
>>UPROPS_LARGE_MANT_SHIFT
;
628 exp
=(int32_t)numericValue
&UPROPS_LARGE_EXP_MASK
;
631 exp
+=UPROPS_LARGE_EXP_OFFSET_EXTRA
;
633 return U_NO_NUMERIC_VALUE
; /* reserved mantissa value */
635 exp
+=UPROPS_LARGE_EXP_OFFSET
;
640 /* multiply by 10^exp without math.h */
664 /* ICU 3.4: bidi/shaping properties moved to ubidi_props.c */
666 /* ICU 2.1: u_getCombiningClass() moved to unorm.cpp */
668 U_CAPI
int32_t U_EXPORT2
669 u_digit(UChar32 ch
, int8_t radix
) {
671 if((uint8_t)(radix
-2)<=(36-2)) {
672 value
=(int8_t)u_charDigitValue(ch
);
674 /* ch is not a decimal digit, try latin letters */
675 if(ch
>=0x61 && ch
<=0x7A) {
676 value
=(int8_t)(ch
-0x57); /* ch - 'a' + 10 */
677 } else if(ch
>=0x41 && ch
<=0x5A) {
678 value
=(int8_t)(ch
-0x37); /* ch - 'A' + 10 */
679 } else if(ch
>=0xFF41 && ch
<=0xFF5A) {
680 value
=(int8_t)(ch
-0xFF37); /* fullwidth ASCII a-z */
681 } else if(ch
>=0xFF21 && ch
<=0xFF3A) {
682 value
=(int8_t)(ch
-0xFF17); /* fullwidth ASCII A-Z */
686 value
=-1; /* invalid radix */
688 return (int8_t)((value
<radix
) ? value
: -1);
691 U_CAPI UChar32 U_EXPORT2
692 u_forDigit(int32_t digit
, int8_t radix
) {
693 if((uint8_t)(radix
-2)>(36-2) || (uint32_t)digit
>=(uint32_t)radix
) {
695 } else if(digit
<10) {
696 return (UChar32
)(0x30+digit
);
698 return (UChar32
)((0x61-10)+digit
);
702 /* miscellaneous, and support for uprops.c ---------------------------------- */
704 U_CAPI
void U_EXPORT2
705 u_getUnicodeVersion(UVersionInfo versionArray
) {
706 if(versionArray
!=NULL
) {
707 uprv_memcpy(versionArray
, dataVersion
, U_MAX_VERSION_LENGTH
);
712 u_getUnicodeProperties(UChar32 c
, int32_t column
) {
720 #if !UCHAR_HARDCODE_DATA
721 !HAVE_DATA
|| countPropsVectors
==0 ||
723 column
<0 || column
>=propsVectorsColumns
727 UTRIE_GET16(&propsVectorsTrie
, c
, vecIndex
);
728 return propsVectors
[vecIndex
+column
];
733 uprv_getMaxValues(int32_t column
) {
734 #if !UCHAR_HARDCODE_DATA
739 return indexes
[UPROPS_MAX_VALUES_INDEX
];
741 return indexes
[UPROPS_MAX_VALUES_2_INDEX
];
745 #if !UCHAR_HARDCODE_DATA
753 * get Hangul Syllable Type
754 * implemented here so that uchar.c (uhst_addPropertyStarts())
755 * does not depend on uprops.c (u_getIntPropertyValue(c, UCHAR_HANGUL_SYLLABLE_TYPE))
757 U_CFUNC UHangulSyllableType
758 uchar_getHST(UChar32 c
) {
759 /* purely algorithmic; hardcode known characters, check for assigned new ones */
761 /* U_HST_NOT_APPLICABLE */
762 } else if(c
<=0x11ff) {
765 /* Jamo L range, HANGUL CHOSEONG ... */
766 if(c
==0x115f || c
<=0x1159 || u_charType(c
)==U_OTHER_LETTER
) {
767 return U_HST_LEADING_JAMO
;
769 } else if(c
<=0x11a7) {
770 /* Jamo V range, HANGUL JUNGSEONG ... */
771 if(c
<=0x11a2 || u_charType(c
)==U_OTHER_LETTER
) {
772 return U_HST_VOWEL_JAMO
;
776 if(c
<=0x11f9 || u_charType(c
)==U_OTHER_LETTER
) {
777 return U_HST_TRAILING_JAMO
;
780 } else if((c
-=HANGUL_BASE
)<0) {
781 /* U_HST_NOT_APPLICABLE */
782 } else if(c
<HANGUL_COUNT
) {
783 /* Hangul syllable */
784 return c%JAMO_T_COUNT
==0 ? U_HST_LV_SYLLABLE
: U_HST_LVT_SYLLABLE
;
786 return U_HST_NOT_APPLICABLE
;
789 U_CAPI
void U_EXPORT2
790 u_charAge(UChar32 c
, UVersionInfo versionArray
) {
791 if(versionArray
!=NULL
) {
792 uint32_t version
=u_getUnicodeProperties(c
, 0)>>UPROPS_AGE_SHIFT
;
793 versionArray
[0]=(uint8_t)(version
>>4);
794 versionArray
[1]=(uint8_t)(version
&0xf);
795 versionArray
[2]=versionArray
[3]=0;
799 U_CAPI UScriptCode U_EXPORT2
800 uscript_getScript(UChar32 c
, UErrorCode
*pErrorCode
) {
801 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
802 return USCRIPT_INVALID_CODE
;
804 if((uint32_t)c
>0x10ffff) {
805 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
806 return USCRIPT_INVALID_CODE
;
809 return (UScriptCode
)(u_getUnicodeProperties(c
, 0)&UPROPS_SCRIPT_MASK
);
812 U_CAPI UBlockCode U_EXPORT2
813 ublock_getCode(UChar32 c
) {
814 return (UBlockCode
)((u_getUnicodeProperties(c
, 0)&UPROPS_BLOCK_MASK
)>>UPROPS_BLOCK_SHIFT
);
817 /* property starts for UnicodeSet ------------------------------------------- */
819 /* for Hangul_Syllable_Type */
820 U_CFUNC
void U_EXPORT2
821 uhst_addPropertyStarts(const USetAdder
*sa
, UErrorCode
*pErrorCode
) {
823 int32_t value
, value2
;
825 if(U_FAILURE(*pErrorCode
)) {
829 #if !UCHAR_HARDCODE_DATA
831 *pErrorCode
=dataErrorCode
;
836 /* add code points with hardcoded properties, plus the ones following them */
839 * Add Jamo type boundaries for UCHAR_HANGUL_SYLLABLE_TYPE.
840 * First, we add fixed boundaries for the blocks of Jamos.
841 * Then we check in loops to see where the current Unicode version
842 * actually stops assigning such Jamos. We start each loop
843 * at the end of the per-Jamo-block assignments in Unicode 4 or earlier.
844 * (These have not changed since Unicode 2.)
846 sa
->add(sa
->set
, 0x1100);
847 value
=U_HST_LEADING_JAMO
;
848 for(c
=0x115a; c
<=0x115f; ++c
) {
849 value2
=uchar_getHST(c
);
856 sa
->add(sa
->set
, 0x1160);
857 value
=U_HST_VOWEL_JAMO
;
858 for(c
=0x11a3; c
<=0x11a7; ++c
) {
859 value2
=uchar_getHST(c
);
866 sa
->add(sa
->set
, 0x11a8);
867 value
=U_HST_TRAILING_JAMO
;
868 for(c
=0x11fa; c
<=0x11ff; ++c
) {
869 value2
=uchar_getHST(c
);
876 /* Add Hangul type boundaries for UCHAR_HANGUL_SYLLABLE_TYPE. */
877 for(c
=HANGUL_BASE
; c
<(HANGUL_BASE
+HANGUL_COUNT
); c
+=JAMO_T_COUNT
) {
879 sa
->add(sa
->set
, c
+1);
884 static UBool U_CALLCONV
885 _enumPropertyStartsRange(const void *context
, UChar32 start
, UChar32 limit
, uint32_t value
) {
886 /* add the start code point to the USet */
887 const USetAdder
*sa
=(const USetAdder
*)context
;
888 sa
->add(sa
->set
, start
);
892 #define USET_ADD_CP_AND_NEXT(sa, cp) sa->add(sa->set, cp); sa->add(sa->set, cp+1)
894 U_CFUNC
void U_EXPORT2
895 uchar_addPropertyStarts(const USetAdder
*sa
, UErrorCode
*pErrorCode
) {
896 if(U_FAILURE(*pErrorCode
)) {
900 #if !UCHAR_HARDCODE_DATA
902 *pErrorCode
=dataErrorCode
;
907 /* add the start code point of each same-value range of the main trie */
908 utrie_enum(&propsTrie
, NULL
, _enumPropertyStartsRange
, sa
);
910 /* add code points with hardcoded properties, plus the ones following them */
912 /* add for u_isblank() */
913 USET_ADD_CP_AND_NEXT(sa
, TAB
);
915 /* add for IS_THAT_CONTROL_SPACE() */
916 sa
->add(sa
->set
, CR
+1); /* range TAB..CR */
917 sa
->add(sa
->set
, 0x1c);
918 sa
->add(sa
->set
, 0x1f+1);
919 USET_ADD_CP_AND_NEXT(sa
, NL
);
921 /* add for u_isIDIgnorable() what was not added above */
922 sa
->add(sa
->set
, DEL
); /* range DEL..NBSP-1, NBSP added below */
923 sa
->add(sa
->set
, HAIRSP
);
924 sa
->add(sa
->set
, RLM
+1);
925 sa
->add(sa
->set
, INHSWAP
);
926 sa
->add(sa
->set
, NOMDIG
+1);
927 USET_ADD_CP_AND_NEXT(sa
, ZWNBSP
);
929 /* add no-break spaces for u_isWhitespace() what was not added above */
930 USET_ADD_CP_AND_NEXT(sa
, NBSP
);
931 USET_ADD_CP_AND_NEXT(sa
, FIGURESP
);
932 USET_ADD_CP_AND_NEXT(sa
, NNBSP
);
934 /* add for u_digit() */
935 sa
->add(sa
->set
, U_a
);
936 sa
->add(sa
->set
, U_z
+1);
937 sa
->add(sa
->set
, U_A
);
938 sa
->add(sa
->set
, U_Z
+1);
939 sa
->add(sa
->set
, U_FW_a
);
940 sa
->add(sa
->set
, U_FW_z
+1);
941 sa
->add(sa
->set
, U_FW_A
);
942 sa
->add(sa
->set
, U_FW_Z
+1);
944 /* add for u_isxdigit() */
945 sa
->add(sa
->set
, U_f
+1);
946 sa
->add(sa
->set
, U_F
+1);
947 sa
->add(sa
->set
, U_FW_f
+1);
948 sa
->add(sa
->set
, U_FW_F
+1);
950 /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */
951 sa
->add(sa
->set
, WJ
); /* range WJ..NOMDIG */
952 sa
->add(sa
->set
, 0xfff0);
953 sa
->add(sa
->set
, 0xfffb+1);
954 sa
->add(sa
->set
, 0xe0000);
955 sa
->add(sa
->set
, 0xe0fff+1);
957 /* add for UCHAR_GRAPHEME_BASE and others */
958 USET_ADD_CP_AND_NEXT(sa
, CGJ
);
961 U_CFUNC
void U_EXPORT2
962 upropsvec_addPropertyStarts(const USetAdder
*sa
, UErrorCode
*pErrorCode
) {
963 if(U_FAILURE(*pErrorCode
)) {
967 #if !UCHAR_HARDCODE_DATA
969 *pErrorCode
=dataErrorCode
;
974 /* add the start code point of each same-value range of the properties vectors trie */
975 if(propsVectorsColumns
>0) {
976 /* if propsVectorsColumns==0 then the properties vectors trie may not be there at all */
977 utrie_enum(&propsVectorsTrie
, NULL
, _enumPropertyStartsRange
, sa
);