1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2002-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: uprops.cpp
12 * tab size: 8 (not used)
15 * created on: 2002feb24
16 * created by: Markus W. Scherer
18 * Implementations for mostly non-core Unicode character properties
19 * stored in uprops.icu.
21 * With the APIs implemented here, almost all properties files and
22 * their associated implementation files are used from this file,
23 * including those for normalization and case mappings.
26 #include "unicode/utypes.h"
27 #include "unicode/uchar.h"
28 #include "unicode/ucptrie.h"
29 #include "unicode/udata.h"
30 #include "unicode/unorm2.h"
31 #include "unicode/uscript.h"
32 #include "unicode/ustring.h"
35 #include "normalizer2impl.h"
37 #include "ubidi_props.h"
41 #include "ulayout_props.h"
46 // Unicode text layout properties data -----------------------------------------
50 icu::UInitOnce gLayoutInitOnce
= U_INITONCE_INITIALIZER
;
51 UDataMemory
*gLayoutMemory
= nullptr;
53 UCPTrie
*gInpcTrie
= nullptr; // Indic_Positional_Category
54 UCPTrie
*gInscTrie
= nullptr; // Indic_Syllabic_Category
55 UCPTrie
*gVoTrie
= nullptr; // Vertical_Orientation
57 int32_t gMaxInpcValue
= 0;
58 int32_t gMaxInscValue
= 0;
59 int32_t gMaxVoValue
= 0;
61 UBool U_CALLCONV
uprops_cleanup() {
62 udata_close(gLayoutMemory
);
63 gLayoutMemory
= nullptr;
65 ucptrie_close(gInpcTrie
);
67 ucptrie_close(gInscTrie
);
69 ucptrie_close(gVoTrie
);
76 gLayoutInitOnce
.reset();
81 ulayout_isAcceptable(void * /*context*/,
82 const char * /* type */, const char * /*name*/,
83 const UDataInfo
*pInfo
) {
84 return pInfo
->size
>= 20 &&
85 pInfo
->isBigEndian
== U_IS_BIG_ENDIAN
&&
86 pInfo
->charsetFamily
== U_CHARSET_FAMILY
&&
87 pInfo
->dataFormat
[0] == ULAYOUT_FMT_0
&&
88 pInfo
->dataFormat
[1] == ULAYOUT_FMT_1
&&
89 pInfo
->dataFormat
[2] == ULAYOUT_FMT_2
&&
90 pInfo
->dataFormat
[3] == ULAYOUT_FMT_3
&&
91 pInfo
->formatVersion
[0] == 1;
94 // UInitOnce singleton initialization function
95 void U_CALLCONV
ulayout_load(UErrorCode
&errorCode
) {
96 gLayoutMemory
= udata_openChoice(
97 nullptr, ULAYOUT_DATA_TYPE
, ULAYOUT_DATA_NAME
,
98 ulayout_isAcceptable
, nullptr, &errorCode
);
99 if (U_FAILURE(errorCode
)) { return; }
101 const uint8_t *inBytes
= (const uint8_t *)udata_getMemory(gLayoutMemory
);
102 const int32_t *inIndexes
= (const int32_t *)inBytes
;
103 int32_t indexesLength
= inIndexes
[ULAYOUT_IX_INDEXES_LENGTH
];
104 if (indexesLength
< 12) {
105 errorCode
= U_INVALID_FORMAT_ERROR
; // Not enough indexes.
108 int32_t offset
= indexesLength
* 4;
109 int32_t top
= inIndexes
[ULAYOUT_IX_INPC_TRIE_TOP
];
110 int32_t trieSize
= top
- offset
;
111 if (trieSize
>= 16) {
112 gInpcTrie
= ucptrie_openFromBinary(
113 UCPTRIE_TYPE_ANY
, UCPTRIE_VALUE_BITS_ANY
,
114 inBytes
+ offset
, trieSize
, nullptr, &errorCode
);
117 top
= inIndexes
[ULAYOUT_IX_INSC_TRIE_TOP
];
118 trieSize
= top
- offset
;
119 if (trieSize
>= 16) {
120 gInscTrie
= ucptrie_openFromBinary(
121 UCPTRIE_TYPE_ANY
, UCPTRIE_VALUE_BITS_ANY
,
122 inBytes
+ offset
, trieSize
, nullptr, &errorCode
);
125 top
= inIndexes
[ULAYOUT_IX_VO_TRIE_TOP
];
126 trieSize
= top
- offset
;
127 if (trieSize
>= 16) {
128 gVoTrie
= ucptrie_openFromBinary(
129 UCPTRIE_TYPE_ANY
, UCPTRIE_VALUE_BITS_ANY
,
130 inBytes
+ offset
, trieSize
, nullptr, &errorCode
);
133 uint32_t maxValues
= inIndexes
[ULAYOUT_IX_MAX_VALUES
];
134 gMaxInpcValue
= maxValues
>> ULAYOUT_MAX_INPC_SHIFT
;
135 gMaxInscValue
= (maxValues
>> ULAYOUT_MAX_INSC_SHIFT
) & 0xff;
136 gMaxVoValue
= (maxValues
>> ULAYOUT_MAX_VO_SHIFT
) & 0xff;
138 ucln_common_registerCleanup(UCLN_COMMON_UPROPS
, uprops_cleanup
);
141 UBool
ulayout_ensureData(UErrorCode
&errorCode
) {
142 if (U_FAILURE(errorCode
)) { return FALSE
; }
143 umtx_initOnce(gLayoutInitOnce
, &ulayout_load
, errorCode
);
144 return U_SUCCESS(errorCode
);
147 UBool
ulayout_ensureData() {
148 UErrorCode errorCode
= U_ZERO_ERROR
;
149 return ulayout_ensureData(errorCode
);
154 /* general properties API functions ----------------------------------------- */
156 struct BinaryProperty
;
158 typedef UBool
BinaryPropertyContains(const BinaryProperty
&prop
, UChar32 c
, UProperty which
);
160 struct BinaryProperty
{
161 int32_t column
; // SRC_PROPSVEC column, or "source" if mask==0
163 BinaryPropertyContains
*contains
;
166 static UBool
defaultContains(const BinaryProperty
&prop
, UChar32 c
, UProperty
/*which*/) {
167 /* systematic, directly stored properties */
168 return (u_getUnicodeProperties(c
, prop
.column
)&prop
.mask
)!=0;
171 static UBool
caseBinaryPropertyContains(const BinaryProperty
&/*prop*/, UChar32 c
, UProperty which
) {
172 return static_cast<UBool
>(ucase_hasBinaryProperty(c
, which
));
175 static UBool
isBidiControl(const BinaryProperty
&/*prop*/, UChar32 c
, UProperty
/*which*/) {
176 return ubidi_isBidiControl(c
);
179 static UBool
isMirrored(const BinaryProperty
&/*prop*/, UChar32 c
, UProperty
/*which*/) {
180 return ubidi_isMirrored(c
);
183 static UBool
isJoinControl(const BinaryProperty
&/*prop*/, UChar32 c
, UProperty
/*which*/) {
184 return ubidi_isJoinControl(c
);
187 #if UCONFIG_NO_NORMALIZATION
188 static UBool
hasFullCompositionExclusion(const BinaryProperty
&, UChar32
, UProperty
) {
192 static UBool
hasFullCompositionExclusion(const BinaryProperty
&/*prop*/, UChar32 c
, UProperty
/*which*/) {
193 // By definition, Full_Composition_Exclusion is the same as NFC_QC=No.
194 UErrorCode errorCode
=U_ZERO_ERROR
;
195 const Normalizer2Impl
*impl
=Normalizer2Factory::getNFCImpl(errorCode
);
196 return U_SUCCESS(errorCode
) && impl
->isCompNo(impl
->getNorm16(c
));
200 // UCHAR_NF*_INERT properties
201 #if UCONFIG_NO_NORMALIZATION
202 static UBool
isNormInert(const BinaryProperty
&, UChar32
, UProperty
) {
206 static UBool
isNormInert(const BinaryProperty
&/*prop*/, UChar32 c
, UProperty which
) {
207 UErrorCode errorCode
=U_ZERO_ERROR
;
208 const Normalizer2
*norm2
=Normalizer2Factory::getInstance(
209 (UNormalizationMode
)(which
-UCHAR_NFD_INERT
+UNORM_NFD
), errorCode
);
210 return U_SUCCESS(errorCode
) && norm2
->isInert(c
);
214 #if UCONFIG_NO_NORMALIZATION
215 static UBool
changesWhenCasefolded(const BinaryProperty
&, UChar32
, UProperty
) {
219 static UBool
changesWhenCasefolded(const BinaryProperty
&/*prop*/, UChar32 c
, UProperty
/*which*/) {
221 UErrorCode errorCode
=U_ZERO_ERROR
;
222 const Normalizer2
*nfcNorm2
=Normalizer2::getNFCInstance(errorCode
);
223 if(U_FAILURE(errorCode
)) {
226 if(nfcNorm2
->getDecomposition(c
, nfd
)) {
227 /* c has a decomposition */
228 if(nfd
.length()==1) {
229 c
=nfd
[0]; /* single BMP code point */
230 } else if(nfd
.length()<=U16_MAX_LENGTH
&&
231 nfd
.length()==U16_LENGTH(c
=nfd
.char32At(0))
233 /* single supplementary code point */
238 return FALSE
; /* protect against bad input */
241 /* single code point */
242 const UChar
*resultString
;
243 return (UBool
)(ucase_toFullFolding(c
, &resultString
, U_FOLD_CASE_DEFAULT
)>=0);
245 /* guess some large but stack-friendly capacity */
246 UChar dest
[2*UCASE_MAX_STRING_LENGTH
];
248 destLength
=u_strFoldCase(dest
, UPRV_LENGTHOF(dest
),
249 nfd
.getBuffer(), nfd
.length(),
250 U_FOLD_CASE_DEFAULT
, &errorCode
);
251 return (UBool
)(U_SUCCESS(errorCode
) &&
252 0!=u_strCompare(nfd
.getBuffer(), nfd
.length(),
253 dest
, destLength
, FALSE
));
258 #if UCONFIG_NO_NORMALIZATION
259 static UBool
changesWhenNFKC_Casefolded(const BinaryProperty
&, UChar32
, UProperty
) {
263 static UBool
changesWhenNFKC_Casefolded(const BinaryProperty
&/*prop*/, UChar32 c
, UProperty
/*which*/) {
264 UErrorCode errorCode
=U_ZERO_ERROR
;
265 const Normalizer2Impl
*kcf
=Normalizer2Factory::getNFKC_CFImpl(errorCode
);
266 if(U_FAILURE(errorCode
)) {
269 UnicodeString
src(c
);
272 // The ReorderingBuffer must be in a block because its destructor
273 // needs to release dest's buffer before we look at its contents.
274 ReorderingBuffer
buffer(*kcf
, dest
);
275 // Small destCapacity for NFKC_CF(c).
276 if(buffer
.init(5, errorCode
)) {
277 const UChar
*srcArray
=src
.getBuffer();
278 kcf
->compose(srcArray
, srcArray
+src
.length(), FALSE
,
279 TRUE
, buffer
, errorCode
);
282 return U_SUCCESS(errorCode
) && dest
!=src
;
286 #if UCONFIG_NO_NORMALIZATION
287 static UBool
isCanonSegmentStarter(const BinaryProperty
&, UChar32
, UProperty
) {
291 static UBool
isCanonSegmentStarter(const BinaryProperty
&/*prop*/, UChar32 c
, UProperty
/*which*/) {
292 UErrorCode errorCode
=U_ZERO_ERROR
;
293 const Normalizer2Impl
*impl
=Normalizer2Factory::getNFCImpl(errorCode
);
295 U_SUCCESS(errorCode
) && impl
->ensureCanonIterData(errorCode
) &&
296 impl
->isCanonSegmentStarter(c
);
300 static UBool
isPOSIX_alnum(const BinaryProperty
&/*prop*/, UChar32 c
, UProperty
/*which*/) {
301 return u_isalnumPOSIX(c
);
304 static UBool
isPOSIX_blank(const BinaryProperty
&/*prop*/, UChar32 c
, UProperty
/*which*/) {
308 static UBool
isPOSIX_graph(const BinaryProperty
&/*prop*/, UChar32 c
, UProperty
/*which*/) {
309 return u_isgraphPOSIX(c
);
312 static UBool
isPOSIX_print(const BinaryProperty
&/*prop*/, UChar32 c
, UProperty
/*which*/) {
313 return u_isprintPOSIX(c
);
316 static UBool
isPOSIX_xdigit(const BinaryProperty
&/*prop*/, UChar32 c
, UProperty
/*which*/) {
317 return u_isxdigit(c
);
320 static UBool
isRegionalIndicator(const BinaryProperty
&/*prop*/, UChar32 c
, UProperty
/*which*/) {
321 // Property starts are a subset of lb=RI etc.
322 return 0x1F1E6<=c
&& c
<=0x1F1FF;
325 static const BinaryProperty binProps
[UCHAR_BINARY_LIMIT
]={
327 * column and mask values for binary properties from u_getUnicodeProperties().
328 * Must be in order of corresponding UProperty,
329 * and there must be exactly one entry per binary UProperty.
331 * Properties with mask==0 are handled in code.
332 * For them, column is the UPropertySource value.
334 { 1, U_MASK(UPROPS_ALPHABETIC
), defaultContains
},
335 { 1, U_MASK(UPROPS_ASCII_HEX_DIGIT
), defaultContains
},
336 { UPROPS_SRC_BIDI
, 0, isBidiControl
},
337 { UPROPS_SRC_BIDI
, 0, isMirrored
},
338 { 1, U_MASK(UPROPS_DASH
), defaultContains
},
339 { 1, U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT
), defaultContains
},
340 { 1, U_MASK(UPROPS_DEPRECATED
), defaultContains
},
341 { 1, U_MASK(UPROPS_DIACRITIC
), defaultContains
},
342 { 1, U_MASK(UPROPS_EXTENDER
), defaultContains
},
343 { UPROPS_SRC_NFC
, 0, hasFullCompositionExclusion
},
344 { 1, U_MASK(UPROPS_GRAPHEME_BASE
), defaultContains
},
345 { 1, U_MASK(UPROPS_GRAPHEME_EXTEND
), defaultContains
},
346 { 1, U_MASK(UPROPS_GRAPHEME_LINK
), defaultContains
},
347 { 1, U_MASK(UPROPS_HEX_DIGIT
), defaultContains
},
348 { 1, U_MASK(UPROPS_HYPHEN
), defaultContains
},
349 { 1, U_MASK(UPROPS_ID_CONTINUE
), defaultContains
},
350 { 1, U_MASK(UPROPS_ID_START
), defaultContains
},
351 { 1, U_MASK(UPROPS_IDEOGRAPHIC
), defaultContains
},
352 { 1, U_MASK(UPROPS_IDS_BINARY_OPERATOR
), defaultContains
},
353 { 1, U_MASK(UPROPS_IDS_TRINARY_OPERATOR
), defaultContains
},
354 { UPROPS_SRC_BIDI
, 0, isJoinControl
},
355 { 1, U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION
), defaultContains
},
356 { UPROPS_SRC_CASE
, 0, caseBinaryPropertyContains
}, // UCHAR_LOWERCASE
357 { 1, U_MASK(UPROPS_MATH
), defaultContains
},
358 { 1, U_MASK(UPROPS_NONCHARACTER_CODE_POINT
), defaultContains
},
359 { 1, U_MASK(UPROPS_QUOTATION_MARK
), defaultContains
},
360 { 1, U_MASK(UPROPS_RADICAL
), defaultContains
},
361 { UPROPS_SRC_CASE
, 0, caseBinaryPropertyContains
}, // UCHAR_SOFT_DOTTED
362 { 1, U_MASK(UPROPS_TERMINAL_PUNCTUATION
), defaultContains
},
363 { 1, U_MASK(UPROPS_UNIFIED_IDEOGRAPH
), defaultContains
},
364 { UPROPS_SRC_CASE
, 0, caseBinaryPropertyContains
}, // UCHAR_UPPERCASE
365 { 1, U_MASK(UPROPS_WHITE_SPACE
), defaultContains
},
366 { 1, U_MASK(UPROPS_XID_CONTINUE
), defaultContains
},
367 { 1, U_MASK(UPROPS_XID_START
), defaultContains
},
368 { UPROPS_SRC_CASE
, 0, caseBinaryPropertyContains
}, // UCHAR_CASE_SENSITIVE
369 { 1, U_MASK(UPROPS_S_TERM
), defaultContains
},
370 { 1, U_MASK(UPROPS_VARIATION_SELECTOR
), defaultContains
},
371 { UPROPS_SRC_NFC
, 0, isNormInert
}, // UCHAR_NFD_INERT
372 { UPROPS_SRC_NFKC
, 0, isNormInert
}, // UCHAR_NFKD_INERT
373 { UPROPS_SRC_NFC
, 0, isNormInert
}, // UCHAR_NFC_INERT
374 { UPROPS_SRC_NFKC
, 0, isNormInert
}, // UCHAR_NFKC_INERT
375 { UPROPS_SRC_NFC_CANON_ITER
, 0, isCanonSegmentStarter
},
376 { 1, U_MASK(UPROPS_PATTERN_SYNTAX
), defaultContains
},
377 { 1, U_MASK(UPROPS_PATTERN_WHITE_SPACE
), defaultContains
},
378 { UPROPS_SRC_CHAR_AND_PROPSVEC
, 0, isPOSIX_alnum
},
379 { UPROPS_SRC_CHAR
, 0, isPOSIX_blank
},
380 { UPROPS_SRC_CHAR
, 0, isPOSIX_graph
},
381 { UPROPS_SRC_CHAR
, 0, isPOSIX_print
},
382 { UPROPS_SRC_CHAR
, 0, isPOSIX_xdigit
},
383 { UPROPS_SRC_CASE
, 0, caseBinaryPropertyContains
}, // UCHAR_CASED
384 { UPROPS_SRC_CASE
, 0, caseBinaryPropertyContains
}, // UCHAR_CASE_IGNORABLE
385 { UPROPS_SRC_CASE
, 0, caseBinaryPropertyContains
}, // UCHAR_CHANGES_WHEN_LOWERCASED
386 { UPROPS_SRC_CASE
, 0, caseBinaryPropertyContains
}, // UCHAR_CHANGES_WHEN_UPPERCASED
387 { UPROPS_SRC_CASE
, 0, caseBinaryPropertyContains
}, // UCHAR_CHANGES_WHEN_TITLECASED
388 { UPROPS_SRC_CASE_AND_NORM
, 0, changesWhenCasefolded
},
389 { UPROPS_SRC_CASE
, 0, caseBinaryPropertyContains
}, // UCHAR_CHANGES_WHEN_CASEMAPPED
390 { UPROPS_SRC_NFKC_CF
, 0, changesWhenNFKC_Casefolded
},
391 { 2, U_MASK(UPROPS_2_EMOJI
), defaultContains
},
392 { 2, U_MASK(UPROPS_2_EMOJI_PRESENTATION
), defaultContains
},
393 { 2, U_MASK(UPROPS_2_EMOJI_MODIFIER
), defaultContains
},
394 { 2, U_MASK(UPROPS_2_EMOJI_MODIFIER_BASE
), defaultContains
},
395 { 2, U_MASK(UPROPS_2_EMOJI_COMPONENT
), defaultContains
},
396 { 2, 0, isRegionalIndicator
},
397 { 1, U_MASK(UPROPS_PREPENDED_CONCATENATION_MARK
), defaultContains
},
398 { 2, U_MASK(UPROPS_2_EXTENDED_PICTOGRAPHIC
), defaultContains
},
401 U_CAPI UBool U_EXPORT2
402 u_hasBinaryProperty(UChar32 c
, UProperty which
) {
403 /* c is range-checked in the functions that are called from here */
404 if(which
<UCHAR_BINARY_START
|| UCHAR_BINARY_LIMIT
<=which
) {
405 /* not a known binary property */
408 const BinaryProperty
&prop
=binProps
[which
];
409 return prop
.contains(prop
, c
, which
);
413 // Apple-only specific version of the above
414 U_CAPI UBool U_EXPORT2
415 u_isEmoji(UChar32 c
) {
416 const BinaryProperty
&prop
=binProps
[UCHAR_EMOJI
];
417 return prop
.contains(prop
, c
, UCHAR_EMOJI
);
422 typedef int32_t IntPropertyGetValue(const IntProperty
&prop
, UChar32 c
, UProperty which
);
423 typedef int32_t IntPropertyGetMaxValue(const IntProperty
&prop
, UProperty which
);
426 int32_t column
; // SRC_PROPSVEC column, or "source" if mask==0
428 int32_t shift
; // =maxValue if getMaxValueFromShift() is used
429 IntPropertyGetValue
*getValue
;
430 IntPropertyGetMaxValue
*getMaxValue
;
433 static int32_t defaultGetValue(const IntProperty
&prop
, UChar32 c
, UProperty
/*which*/) {
434 /* systematic, directly stored properties */
435 return (int32_t)(u_getUnicodeProperties(c
, prop
.column
)&prop
.mask
)>>prop
.shift
;
438 static int32_t defaultGetMaxValue(const IntProperty
&prop
, UProperty
/*which*/) {
439 return (uprv_getMaxValues(prop
.column
)&prop
.mask
)>>prop
.shift
;
442 static int32_t getMaxValueFromShift(const IntProperty
&prop
, UProperty
/*which*/) {
446 static int32_t getBiDiClass(const IntProperty
&/*prop*/, UChar32 c
, UProperty
/*which*/) {
447 return (int32_t)u_charDirection(c
);
450 static int32_t getBiDiPairedBracketType(const IntProperty
&/*prop*/, UChar32 c
, UProperty
/*which*/) {
451 return (int32_t)ubidi_getPairedBracketType(c
);
454 static int32_t biDiGetMaxValue(const IntProperty
&/*prop*/, UProperty which
) {
455 return ubidi_getMaxValue(which
);
458 #if UCONFIG_NO_NORMALIZATION
459 static int32_t getCombiningClass(const IntProperty
&, UChar32
, UProperty
) {
463 static int32_t getCombiningClass(const IntProperty
&/*prop*/, UChar32 c
, UProperty
/*which*/) {
464 return u_getCombiningClass(c
);
468 static int32_t getGeneralCategory(const IntProperty
&/*prop*/, UChar32 c
, UProperty
/*which*/) {
469 return (int32_t)u_charType(c
);
472 static int32_t getJoiningGroup(const IntProperty
&/*prop*/, UChar32 c
, UProperty
/*which*/) {
473 return ubidi_getJoiningGroup(c
);
476 static int32_t getJoiningType(const IntProperty
&/*prop*/, UChar32 c
, UProperty
/*which*/) {
477 return ubidi_getJoiningType(c
);
480 static int32_t getNumericType(const IntProperty
&/*prop*/, UChar32 c
, UProperty
/*which*/) {
481 int32_t ntv
=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getMainProperties(c
));
482 return UPROPS_NTV_GET_TYPE(ntv
);
485 static int32_t getScript(const IntProperty
&/*prop*/, UChar32 c
, UProperty
/*which*/) {
486 UErrorCode errorCode
=U_ZERO_ERROR
;
487 return (int32_t)uscript_getScript(c
, &errorCode
);
490 static int32_t scriptGetMaxValue(const IntProperty
&/*prop*/, UProperty
/*which*/) {
491 uint32_t scriptX
=uprv_getMaxValues(0)&UPROPS_SCRIPT_X_MASK
;
492 return uprops_mergeScriptCodeOrIndex(scriptX
);
496 * Map some of the Grapheme Cluster Break values to Hangul Syllable Types.
497 * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break.
499 static const UHangulSyllableType gcbToHst
[]={
500 U_HST_NOT_APPLICABLE
, /* U_GCB_OTHER */
501 U_HST_NOT_APPLICABLE
, /* U_GCB_CONTROL */
502 U_HST_NOT_APPLICABLE
, /* U_GCB_CR */
503 U_HST_NOT_APPLICABLE
, /* U_GCB_EXTEND */
504 U_HST_LEADING_JAMO
, /* U_GCB_L */
505 U_HST_NOT_APPLICABLE
, /* U_GCB_LF */
506 U_HST_LV_SYLLABLE
, /* U_GCB_LV */
507 U_HST_LVT_SYLLABLE
, /* U_GCB_LVT */
508 U_HST_TRAILING_JAMO
, /* U_GCB_T */
509 U_HST_VOWEL_JAMO
/* U_GCB_V */
511 * Omit GCB values beyond what we need for hst.
512 * The code below checks for the array length.
516 static int32_t getHangulSyllableType(const IntProperty
&/*prop*/, UChar32 c
, UProperty
/*which*/) {
517 /* see comments on gcbToHst[] above */
518 int32_t gcb
=(int32_t)(u_getUnicodeProperties(c
, 2)&UPROPS_GCB_MASK
)>>UPROPS_GCB_SHIFT
;
519 if(gcb
<UPRV_LENGTHOF(gcbToHst
)) {
520 return gcbToHst
[gcb
];
522 return U_HST_NOT_APPLICABLE
;
526 #if UCONFIG_NO_NORMALIZATION
527 static int32_t getNormQuickCheck(const IntProperty
&, UChar32
, UProperty
) {
531 static int32_t getNormQuickCheck(const IntProperty
&/*prop*/, UChar32 c
, UProperty which
) {
532 return (int32_t)unorm_getQuickCheck(c
, (UNormalizationMode
)(which
-UCHAR_NFD_QUICK_CHECK
+UNORM_NFD
));
536 #if UCONFIG_NO_NORMALIZATION
537 static int32_t getLeadCombiningClass(const IntProperty
&, UChar32
, UProperty
) {
541 static int32_t getLeadCombiningClass(const IntProperty
&/*prop*/, UChar32 c
, UProperty
/*which*/) {
542 return unorm_getFCD16(c
)>>8;
546 #if UCONFIG_NO_NORMALIZATION
547 static int32_t getTrailCombiningClass(const IntProperty
&, UChar32
, UProperty
) {
551 static int32_t getTrailCombiningClass(const IntProperty
&/*prop*/, UChar32 c
, UProperty
/*which*/) {
552 return unorm_getFCD16(c
)&0xff;
556 static int32_t getInPC(const IntProperty
&, UChar32 c
, UProperty
) {
557 return ulayout_ensureData() && gInpcTrie
!= nullptr ? ucptrie_get(gInpcTrie
, c
) : 0;
560 static int32_t getInSC(const IntProperty
&, UChar32 c
, UProperty
) {
561 return ulayout_ensureData() && gInscTrie
!= nullptr ? ucptrie_get(gInscTrie
, c
) : 0;
564 static int32_t getVo(const IntProperty
&, UChar32 c
, UProperty
) {
565 return ulayout_ensureData() && gVoTrie
!= nullptr ? ucptrie_get(gVoTrie
, c
) : 0;
568 static int32_t layoutGetMaxValue(const IntProperty
&/*prop*/, UProperty which
) {
569 if (!ulayout_ensureData()) { return 0; }
571 case UCHAR_INDIC_POSITIONAL_CATEGORY
:
572 return gMaxInpcValue
;
573 case UCHAR_INDIC_SYLLABIC_CATEGORY
:
574 return gMaxInscValue
;
575 case UCHAR_VERTICAL_ORIENTATION
:
582 static const IntProperty intProps
[UCHAR_INT_LIMIT
-UCHAR_INT_START
]={
584 * column, mask and shift values for int-value properties from u_getUnicodeProperties().
585 * Must be in order of corresponding UProperty,
586 * and there must be exactly one entry per int UProperty.
588 * Properties with mask==0 are handled in code.
589 * For them, column is the UPropertySource value.
591 { UPROPS_SRC_BIDI
, 0, 0, getBiDiClass
, biDiGetMaxValue
},
592 { 0, UPROPS_BLOCK_MASK
, UPROPS_BLOCK_SHIFT
, defaultGetValue
, defaultGetMaxValue
},
593 { UPROPS_SRC_NFC
, 0, 0xff, getCombiningClass
, getMaxValueFromShift
},
594 { 2, UPROPS_DT_MASK
, 0, defaultGetValue
, defaultGetMaxValue
},
595 { 0, UPROPS_EA_MASK
, UPROPS_EA_SHIFT
, defaultGetValue
, defaultGetMaxValue
},
596 { UPROPS_SRC_CHAR
, 0, (int32_t)U_CHAR_CATEGORY_COUNT
-1,getGeneralCategory
, getMaxValueFromShift
},
597 { UPROPS_SRC_BIDI
, 0, 0, getJoiningGroup
, biDiGetMaxValue
},
598 { UPROPS_SRC_BIDI
, 0, 0, getJoiningType
, biDiGetMaxValue
},
599 { 2, UPROPS_LB_MASK
, UPROPS_LB_SHIFT
, defaultGetValue
, defaultGetMaxValue
},
600 { UPROPS_SRC_CHAR
, 0, (int32_t)U_NT_COUNT
-1, getNumericType
, getMaxValueFromShift
},
601 { UPROPS_SRC_PROPSVEC
, 0, 0, getScript
, scriptGetMaxValue
},
602 { UPROPS_SRC_PROPSVEC
, 0, (int32_t)U_HST_COUNT
-1, getHangulSyllableType
, getMaxValueFromShift
},
603 // UCHAR_NFD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes"
604 { UPROPS_SRC_NFC
, 0, (int32_t)UNORM_YES
, getNormQuickCheck
, getMaxValueFromShift
},
605 // UCHAR_NFKD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes"
606 { UPROPS_SRC_NFKC
, 0, (int32_t)UNORM_YES
, getNormQuickCheck
, getMaxValueFromShift
},
607 // UCHAR_NFC_QUICK_CHECK: max=2=MAYBE
608 { UPROPS_SRC_NFC
, 0, (int32_t)UNORM_MAYBE
, getNormQuickCheck
, getMaxValueFromShift
},
609 // UCHAR_NFKC_QUICK_CHECK: max=2=MAYBE
610 { UPROPS_SRC_NFKC
, 0, (int32_t)UNORM_MAYBE
, getNormQuickCheck
, getMaxValueFromShift
},
611 { UPROPS_SRC_NFC
, 0, 0xff, getLeadCombiningClass
, getMaxValueFromShift
},
612 { UPROPS_SRC_NFC
, 0, 0xff, getTrailCombiningClass
, getMaxValueFromShift
},
613 { 2, UPROPS_GCB_MASK
, UPROPS_GCB_SHIFT
, defaultGetValue
, defaultGetMaxValue
},
614 { 2, UPROPS_SB_MASK
, UPROPS_SB_SHIFT
, defaultGetValue
, defaultGetMaxValue
},
615 { 2, UPROPS_WB_MASK
, UPROPS_WB_SHIFT
, defaultGetValue
, defaultGetMaxValue
},
616 { UPROPS_SRC_BIDI
, 0, 0, getBiDiPairedBracketType
, biDiGetMaxValue
},
617 { UPROPS_SRC_INPC
, 0, 0, getInPC
, layoutGetMaxValue
},
618 { UPROPS_SRC_INSC
, 0, 0, getInSC
, layoutGetMaxValue
},
619 { UPROPS_SRC_VO
, 0, 0, getVo
, layoutGetMaxValue
},
622 U_CAPI
int32_t U_EXPORT2
623 u_getIntPropertyValue(UChar32 c
, UProperty which
) {
624 if(which
<UCHAR_INT_START
) {
625 if(UCHAR_BINARY_START
<=which
&& which
<UCHAR_BINARY_LIMIT
) {
626 const BinaryProperty
&prop
=binProps
[which
];
627 return prop
.contains(prop
, c
, which
);
629 } else if(which
<UCHAR_INT_LIMIT
) {
630 const IntProperty
&prop
=intProps
[which
-UCHAR_INT_START
];
631 return prop
.getValue(prop
, c
, which
);
632 } else if(which
==UCHAR_GENERAL_CATEGORY_MASK
) {
633 return U_MASK(u_charType(c
));
635 return 0; // undefined
638 U_CAPI
int32_t U_EXPORT2
639 u_getIntPropertyMinValue(UProperty
/*which*/) {
640 return 0; /* all binary/enum/int properties have a minimum value of 0 */
643 U_CAPI
int32_t U_EXPORT2
644 u_getIntPropertyMaxValue(UProperty which
) {
645 if(which
<UCHAR_INT_START
) {
646 if(UCHAR_BINARY_START
<=which
&& which
<UCHAR_BINARY_LIMIT
) {
647 return 1; // maximum TRUE for all binary properties
649 } else if(which
<UCHAR_INT_LIMIT
) {
650 const IntProperty
&prop
=intProps
[which
-UCHAR_INT_START
];
651 return prop
.getMaxValue(prop
, which
);
653 return -1; // undefined
656 U_CFUNC UPropertySource U_EXPORT2
657 uprops_getSource(UProperty which
) {
658 if(which
<UCHAR_BINARY_START
) {
659 return UPROPS_SRC_NONE
; /* undefined */
660 } else if(which
<UCHAR_BINARY_LIMIT
) {
661 const BinaryProperty
&prop
=binProps
[which
];
663 return UPROPS_SRC_PROPSVEC
;
665 return (UPropertySource
)prop
.column
;
667 } else if(which
<UCHAR_INT_START
) {
668 return UPROPS_SRC_NONE
; /* undefined */
669 } else if(which
<UCHAR_INT_LIMIT
) {
670 const IntProperty
&prop
=intProps
[which
-UCHAR_INT_START
];
672 return UPROPS_SRC_PROPSVEC
;
674 return (UPropertySource
)prop
.column
;
676 } else if(which
<UCHAR_STRING_START
) {
678 case UCHAR_GENERAL_CATEGORY_MASK
:
679 case UCHAR_NUMERIC_VALUE
:
680 return UPROPS_SRC_CHAR
;
683 return UPROPS_SRC_NONE
;
685 } else if(which
<UCHAR_STRING_LIMIT
) {
688 return UPROPS_SRC_PROPSVEC
;
690 case UCHAR_BIDI_MIRRORING_GLYPH
:
691 return UPROPS_SRC_BIDI
;
693 case UCHAR_CASE_FOLDING
:
694 case UCHAR_LOWERCASE_MAPPING
:
695 case UCHAR_SIMPLE_CASE_FOLDING
:
696 case UCHAR_SIMPLE_LOWERCASE_MAPPING
:
697 case UCHAR_SIMPLE_TITLECASE_MAPPING
:
698 case UCHAR_SIMPLE_UPPERCASE_MAPPING
:
699 case UCHAR_TITLECASE_MAPPING
:
700 case UCHAR_UPPERCASE_MAPPING
:
701 return UPROPS_SRC_CASE
;
703 case UCHAR_ISO_COMMENT
:
705 case UCHAR_UNICODE_1_NAME
:
706 return UPROPS_SRC_NAMES
;
709 return UPROPS_SRC_NONE
;
713 case UCHAR_SCRIPT_EXTENSIONS
:
714 return UPROPS_SRC_PROPSVEC
;
716 return UPROPS_SRC_NONE
; /* undefined */
721 U_CFUNC
void U_EXPORT2
722 uprops_addPropertyStarts(UPropertySource src
, const USetAdder
*sa
, UErrorCode
*pErrorCode
) {
723 if (!ulayout_ensureData(*pErrorCode
)) { return; }
726 case UPROPS_SRC_INPC
:
729 case UPROPS_SRC_INSC
:
736 *pErrorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
740 if (trie
== nullptr) {
741 *pErrorCode
= U_MISSING_RESOURCE_ERROR
;
745 // Add the start code point of each same-value range of the trie.
746 UChar32 start
= 0, end
;
747 while ((end
= ucptrie_getRange(trie
, start
, UCPMAP_RANGE_NORMAL
, 0,
748 nullptr, nullptr, nullptr)) >= 0) {
749 sa
->add(sa
->set
, start
);
754 #if !UCONFIG_NO_NORMALIZATION
756 U_CAPI
int32_t U_EXPORT2
757 u_getFC_NFKC_Closure(UChar32 c
, UChar
*dest
, int32_t destCapacity
, UErrorCode
*pErrorCode
) {
758 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
761 if(destCapacity
<0 || (dest
==NULL
&& destCapacity
>0)) {
762 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
765 // Compute the FC_NFKC_Closure on the fly:
766 // We have the API for complete coverage of Unicode properties, although
767 // this value by itself is not useful via API.
768 // (What could be useful is a custom normalization table that combines
769 // case folding and NFKC.)
770 // For the derivation, see Unicode's DerivedNormalizationProps.txt.
771 const Normalizer2
*nfkc
=Normalizer2::getNFKCInstance(*pErrorCode
);
772 if(U_FAILURE(*pErrorCode
)) {
775 // first: b = NFKC(Fold(a))
776 UnicodeString folded1String
;
777 const UChar
*folded1
;
778 int32_t folded1Length
=ucase_toFullFolding(c
, &folded1
, U_FOLD_CASE_DEFAULT
);
779 if(folded1Length
<0) {
780 const Normalizer2Impl
*nfkcImpl
=Normalizer2Factory::getImpl(nfkc
);
781 if(nfkcImpl
->getCompQuickCheck(nfkcImpl
->getNorm16(c
))!=UNORM_NO
) {
782 return u_terminateUChars(dest
, destCapacity
, 0, pErrorCode
); // c does not change at all under CaseFolding+NFKC
784 folded1String
.setTo(c
);
786 if(folded1Length
>UCASE_MAX_STRING_LENGTH
) {
787 folded1String
.setTo(folded1Length
);
789 folded1String
.setTo(FALSE
, folded1
, folded1Length
);
792 UnicodeString kc1
=nfkc
->normalize(folded1String
, *pErrorCode
);
793 // second: c = NFKC(Fold(b))
794 UnicodeString
folded2String(kc1
);
795 UnicodeString kc2
=nfkc
->normalize(folded2String
.foldCase(), *pErrorCode
);
796 // if (c != b) add the mapping from a to c
797 if(U_FAILURE(*pErrorCode
) || kc1
==kc2
) {
798 return u_terminateUChars(dest
, destCapacity
, 0, pErrorCode
);
800 return kc2
.extract(dest
, destCapacity
, *pErrorCode
);