2 *******************************************************************************
4 * Copyright (C) 2002-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 2002feb24
14 * created by: Markus W. Scherer
16 * Parse more Unicode Character Database files and store
17 * additional Unicode character properties in bit set vectors.
21 #include "unicode/utypes.h"
22 #include "unicode/uchar.h"
23 #include "unicode/uscript.h"
32 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
34 /* data --------------------------------------------------------------------- */
36 static UNewTrie
*trie
;
38 static int32_t pvCount
;
40 /* miscellaneous ------------------------------------------------------------ */
43 trimTerminateField(char *s
, char *limit
) {
44 /* trim leading whitespace */
45 s
=(char *)u_skipWhitespace(s
);
47 /* trim trailing whitespace */
48 while(s
<limit
&& (*(limit
-1)==' ' || *(limit
-1)=='\t')) {
57 parseTwoFieldFile(char *filename
, char *basename
,
58 const char *ucdFile
, const char *suffix
,
60 UErrorCode
*pErrorCode
) {
63 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
67 writeUCDFilename(basename
, ucdFile
, suffix
);
69 u_parseDelimitedFile(filename
, ';', fields
, 2, lineFn
, NULL
, pErrorCode
);
70 if(U_FAILURE(*pErrorCode
)) {
71 fprintf(stderr
, "error parsing %s.txt: %s\n", ucdFile
, u_errorName(*pErrorCode
));
75 static void U_CALLCONV
76 ageLineFn(void *context
,
77 char *fields
[][2], int32_t fieldCount
,
78 UErrorCode
*pErrorCode
);
81 parseMultiFieldFile(char *filename
, char *basename
,
82 const char *ucdFile
, const char *suffix
,
85 UErrorCode
*pErrorCode
) {
88 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
92 writeUCDFilename(basename
, ucdFile
, suffix
);
94 u_parseDelimitedFile(filename
, ';', fields
, fieldCount
, lineFn
, NULL
, pErrorCode
);
95 if(U_FAILURE(*pErrorCode
)) {
96 fprintf(stderr
, "error parsing %s.txt: %s\n", ucdFile
, u_errorName(*pErrorCode
));
100 static void U_CALLCONV
101 numericLineFn(void *context
,
102 char *fields
[][2], int32_t fieldCount
,
103 UErrorCode
*pErrorCode
);
105 static void U_CALLCONV
106 bidiClassLineFn(void *context
,
107 char *fields
[][2], int32_t fieldCount
,
108 UErrorCode
*pErrorCode
);
110 /* parse files with single enumerated properties ---------------------------- */
113 const char *ucdFile
, *propName
;
115 int32_t vecWord
, vecShift
;
118 typedef struct SingleEnum SingleEnum
;
121 parseSingleEnumFile(char *filename
, char *basename
, const char *suffix
,
122 const SingleEnum
*sen
,
123 UErrorCode
*pErrorCode
);
125 static const SingleEnum scriptSingleEnum
={
128 0, 0, UPROPS_SCRIPT_MASK
131 static const SingleEnum blockSingleEnum
={
134 0, UPROPS_BLOCK_SHIFT
, UPROPS_BLOCK_MASK
137 static const SingleEnum lineBreakSingleEnum
={
138 "LineBreak", "line break",
140 0, UPROPS_LB_SHIFT
, UPROPS_LB_MASK
143 static const SingleEnum eawSingleEnum
={
144 "EastAsianWidth", "east asian width",
145 UCHAR_EAST_ASIAN_WIDTH
,
146 0, UPROPS_EA_SHIFT
, UPROPS_EA_MASK
149 static const SingleEnum jtSingleEnum
={
150 "DerivedJoiningType", "joining type",
152 2, UPROPS_JT_SHIFT
, UPROPS_JT_MASK
155 static const SingleEnum jgSingleEnum
={
156 "DerivedJoiningGroup", "joining group",
158 2, UPROPS_JG_SHIFT
, UPROPS_JG_MASK
161 static void U_CALLCONV
162 singleEnumLineFn(void *context
,
163 char *fields
[][2], int32_t fieldCount
,
164 UErrorCode
*pErrorCode
) {
165 const SingleEnum
*sen
;
167 uint32_t start
, limit
, uv
;
170 sen
=(const SingleEnum
*)context
;
172 u_parseCodePointRange(fields
[0][0], &start
, &limit
, pErrorCode
);
173 if(U_FAILURE(*pErrorCode
)) {
174 fprintf(stderr
, "genprops: syntax error in %s.txt field 0 at %s\n", sen
->ucdFile
, fields
[0][0]);
179 /* parse property alias */
180 s
=trimTerminateField(fields
[1][0], fields
[1][1]);
181 value
=u_getPropertyValueEnum(sen
->prop
, s
);
183 if(sen
->prop
==UCHAR_BLOCK
) {
184 if(isToken("Greek", s
)) {
185 value
=UBLOCK_GREEK
; /* Unicode 3.2 renames this to "Greek and Coptic" */
186 } else if(isToken("Combining Marks for Symbols", s
)) {
187 value
=UBLOCK_COMBINING_MARKS_FOR_SYMBOLS
; /* Unicode 3.2 renames this to "Combining Diacritical Marks for Symbols" */
188 } else if(isToken("Private Use", s
)) {
189 value
=UBLOCK_PRIVATE_USE
; /* Unicode 3.2 renames this to "Private Use Area" */
194 fprintf(stderr
, "genprops error: unknown %s name in %s.txt field 1 at %s\n",
195 sen
->propName
, sen
->ucdFile
, s
);
199 uv
=(uint32_t)(value
<<sen
->vecShift
);
200 if((uv
&sen
->vecMask
)!=uv
) {
201 fprintf(stderr
, "genprops error: %s value overflow (0x%x) at %s\n",
202 sen
->propName
, (int)uv
, s
);
203 exit(U_INTERNAL_PROGRAM_ERROR
);
206 if(!upvec_setValue(pv
, start
, limit
, sen
->vecWord
, uv
, sen
->vecMask
, pErrorCode
)) {
207 fprintf(stderr
, "genprops error: unable to set %s code: %s\n",
208 sen
->propName
, u_errorName(*pErrorCode
));
214 parseSingleEnumFile(char *filename
, char *basename
, const char *suffix
,
215 const SingleEnum
*sen
,
216 UErrorCode
*pErrorCode
) {
219 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
223 writeUCDFilename(basename
, sen
->ucdFile
, suffix
);
225 u_parseDelimitedFile(filename
, ';', fields
, 2, singleEnumLineFn
, (void *)sen
, pErrorCode
);
226 if(U_FAILURE(*pErrorCode
)) {
227 fprintf(stderr
, "error parsing %s.txt: %s\n", sen
->ucdFile
, u_errorName(*pErrorCode
));
231 /* parse files with multiple binary properties ------------------------------ */
234 const char *propName
;
235 int32_t vecWord
, vecShift
;
237 typedef struct Binary Binary
;
241 const Binary
*binaries
;
242 int32_t binariesCount
;
244 typedef struct Binaries Binaries
;
248 { "White_Space", 1, UPROPS_WHITE_SPACE
},
249 { "Bidi_Control", 1, UPROPS_BIDI_CONTROL
},
250 { "Join_Control", 1, UPROPS_JOIN_CONTROL
},
251 { "Dash", 1, UPROPS_DASH
},
252 { "Hyphen", 1, UPROPS_HYPHEN
},
253 { "Quotation_Mark", 1, UPROPS_QUOTATION_MARK
},
254 { "Terminal_Punctuation", 1, UPROPS_TERMINAL_PUNCTUATION
},
255 { "Hex_Digit", 1, UPROPS_HEX_DIGIT
},
256 { "ASCII_Hex_Digit", 1, UPROPS_ASCII_HEX_DIGIT
},
257 { "Ideographic", 1, UPROPS_IDEOGRAPHIC
},
258 { "Diacritic", 1, UPROPS_DIACRITIC
},
259 { "Extender", 1, UPROPS_EXTENDER
},
260 { "Noncharacter_Code_Point", 1, UPROPS_NONCHARACTER_CODE_POINT
},
261 { "Grapheme_Link", 1, UPROPS_GRAPHEME_LINK
},
262 { "IDS_Binary_Operator", 1, UPROPS_IDS_BINARY_OPERATOR
},
263 { "IDS_Trinary_Operator", 1, UPROPS_IDS_TRINARY_OPERATOR
},
264 { "Radical", 1, UPROPS_RADICAL
},
265 { "Unified_Ideograph", 1, UPROPS_UNIFIED_IDEOGRAPH
},
266 { "Deprecated", 1, UPROPS_DEPRECATED
},
267 { "Soft_Dotted", 1, UPROPS_SOFT_DOTTED
},
268 { "Logical_Order_Exception", 1, UPROPS_LOGICAL_ORDER_EXCEPTION
},
270 /* new properties in Unicode 4.0.1 */
271 { "STerm", 2, UPROPS_V2_S_TERM
},
272 { "Variation_Selector", 2, UPROPS_V2_VARIATION_SELECTOR
}
275 static const Binaries
277 "PropList", propListNames
, LENGTHOF(propListNames
)
281 derCorePropsNames
[]={
282 { "XID_Start", 1, UPROPS_XID_START
},
283 { "XID_Continue", 1, UPROPS_XID_CONTINUE
},
285 /* before Unicode 4/ICU 2.6/format version 3.2, these used to be Other_XYZ from PropList.txt */
286 { "Math", 1, UPROPS_MATH
},
287 { "Alphabetic", 1, UPROPS_ALPHABETIC
},
288 { "Lowercase", 1, UPROPS_LOWERCASE
},
289 { "Uppercase", 1, UPROPS_UPPERCASE
},
290 { "Grapheme_Extend", 1, UPROPS_GRAPHEME_EXTEND
},
291 { "Default_Ignorable_Code_Point", 1, UPROPS_DEFAULT_IGNORABLE_CODE_POINT
},
293 /* new properties bits in ICU 2.6/format version 3.2 */
294 { "ID_Start", 1, UPROPS_ID_START
},
295 { "ID_Continue", 1, UPROPS_ID_CONTINUE
},
296 { "Grapheme_Base", 1, UPROPS_GRAPHEME_BASE
}
299 static const Binaries
300 derCorePropsBinaries
={
301 "DerivedCoreProperties", derCorePropsNames
, LENGTHOF(derCorePropsNames
)
304 static char ignoredProps
[100][64];
305 static int32_t ignoredPropsCount
;
308 addIgnoredProp(char *s
, char *limit
) {
311 s
=trimTerminateField(s
, limit
);
312 for(i
=0; i
<ignoredPropsCount
; ++i
) {
313 if(0==uprv_strcmp(ignoredProps
[i
], s
)) {
317 uprv_strcpy(ignoredProps
[ignoredPropsCount
++], s
);
320 static void U_CALLCONV
321 binariesLineFn(void *context
,
322 char *fields
[][2], int32_t fieldCount
,
323 UErrorCode
*pErrorCode
) {
326 uint32_t start
, limit
, uv
;
329 bin
=(const Binaries
*)context
;
331 u_parseCodePointRange(fields
[0][0], &start
, &limit
, pErrorCode
);
332 if(U_FAILURE(*pErrorCode
)) {
333 fprintf(stderr
, "genprops: syntax error in %s.txt field 0 at %s\n", bin
->ucdFile
, fields
[0][0]);
338 /* parse binary property name */
339 s
=(char *)u_skipWhitespace(fields
[1][0]);
341 if(i
==bin
->binariesCount
) {
342 /* ignore unrecognized properties */
343 addIgnoredProp(s
, fields
[1][1]);
346 if(isToken(bin
->binaries
[i
].propName
, s
)) {
351 if(bin
->binaries
[i
].vecShift
>=32) {
352 fprintf(stderr
, "genprops error: shift value %d>=32 for %s %s\n",
353 (int)bin
->binaries
[i
].vecShift
, bin
->ucdFile
, bin
->binaries
[i
].propName
);
354 exit(U_INTERNAL_PROGRAM_ERROR
);
356 uv
=U_MASK(bin
->binaries
[i
].vecShift
);
358 if(!upvec_setValue(pv
, start
, limit
, bin
->binaries
[i
].vecWord
, uv
, uv
, pErrorCode
)) {
359 fprintf(stderr
, "genprops error: unable to set %s code: %s\n",
360 bin
->binaries
[i
].propName
, u_errorName(*pErrorCode
));
366 parseBinariesFile(char *filename
, char *basename
, const char *suffix
,
368 UErrorCode
*pErrorCode
) {
372 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
376 writeUCDFilename(basename
, bin
->ucdFile
, suffix
);
380 u_parseDelimitedFile(filename
, ';', fields
, 2, binariesLineFn
, (void *)bin
, pErrorCode
);
381 if(U_FAILURE(*pErrorCode
)) {
382 fprintf(stderr
, "error parsing %s.txt: %s\n", bin
->ucdFile
, u_errorName(*pErrorCode
));
385 for(i
=0; i
<ignoredPropsCount
; ++i
) {
386 printf("genprops: ignoring property %s in %s.txt\n", ignoredProps
[i
], bin
->ucdFile
);
390 /* -------------------------------------------------------------------------- */
393 initAdditionalProperties() {
394 pv
=upvec_open(UPROPS_VECTOR_WORDS
, 20000);
398 generateAdditionalProperties(char *filename
, const char *suffix
, UErrorCode
*pErrorCode
) {
401 basename
=filename
+uprv_strlen(filename
);
403 /* process various UCD .txt files */
405 /* add Han numeric types & values */
406 parseMultiFieldFile(filename
, basename
, "DerivedNumericValues", suffix
, 2, numericLineFn
, pErrorCode
);
408 /* set proper bidi class for unassigned code points (Cn) */
409 parseTwoFieldFile(filename
, basename
, "DerivedBidiClass", suffix
, bidiClassLineFn
, pErrorCode
);
411 parseTwoFieldFile(filename
, basename
, "DerivedAge", suffix
, ageLineFn
, pErrorCode
);
416 * "Common - For characters that may be used
417 * within multiple scripts,
418 * or any unassigned code points."
421 * "The value COMMON is the default value,
422 * given to all code points that are not
423 * explicitly mentioned in the data file."
425 * COMMON==USCRIPT_COMMON==0 - nothing to do
427 parseSingleEnumFile(filename
, basename
, suffix
, &scriptSingleEnum
, pErrorCode
);
429 parseSingleEnumFile(filename
, basename
, suffix
, &blockSingleEnum
, pErrorCode
);
431 parseBinariesFile(filename
, basename
, suffix
, &propListBinaries
, pErrorCode
);
433 parseBinariesFile(filename
, basename
, suffix
, &derCorePropsBinaries
, pErrorCode
);
436 * LineBreak-4.0.0.txt:
437 * - All code points, assigned and unassigned, that are not listed
438 * explicitly are given the value "XX".
440 * XX==U_LB_UNKNOWN==0 - nothing to do
442 parseSingleEnumFile(filename
, basename
, suffix
, &lineBreakSingleEnum
, pErrorCode
);
444 parseSingleEnumFile(filename
, basename
, suffix
, &jtSingleEnum
, pErrorCode
);
446 parseSingleEnumFile(filename
, basename
, suffix
, &jgSingleEnum
, pErrorCode
);
449 * Preset East Asian Width defaults:
451 * http://www.unicode.org/reports/tr11/#Unassigned
452 * 7.1 Unassigned and Private Use characters
454 * All unassigned characters are by default classified as non-East Asian neutral,
455 * except for the range U+20000 to U+2FFFD,
456 * since all code positions from U+20000 to U+2FFFD are intended for CJK ideographs (W).
457 * All Private use characters are by default classified as ambiguous,
458 * since their definition depends on context.
460 * N for all ==0 - nothing to do
464 *pErrorCode
=U_ZERO_ERROR
;
465 if( !upvec_setValue(pv
, 0xe000, 0xf900, 0, (uint32_t)(U_EA_AMBIGUOUS
<<UPROPS_EA_SHIFT
), UPROPS_EA_MASK
, pErrorCode
) ||
466 !upvec_setValue(pv
, 0xf0000, 0xffffe, 0, (uint32_t)(U_EA_AMBIGUOUS
<<UPROPS_EA_SHIFT
), UPROPS_EA_MASK
, pErrorCode
) ||
467 !upvec_setValue(pv
, 0x100000, 0x10fffe, 0, (uint32_t)(U_EA_AMBIGUOUS
<<UPROPS_EA_SHIFT
), UPROPS_EA_MASK
, pErrorCode
) ||
468 !upvec_setValue(pv
, 0x20000, 0x2fffe, 0, (uint32_t)(U_EA_WIDE
<<UPROPS_EA_SHIFT
), UPROPS_EA_MASK
, pErrorCode
)
470 fprintf(stderr
, "genprops: unable to set default East Asian Widths: %s\n", u_errorName(*pErrorCode
));
474 /* parse EastAsianWidth.txt */
475 parseSingleEnumFile(filename
, basename
, suffix
, &eawSingleEnum
, pErrorCode
);
477 trie
=utrie_open(NULL
, NULL
, 50000, 0, 0, TRUE
);
479 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
484 pvCount
=upvec_toTrie(pv
, trie
, pErrorCode
);
485 if(U_FAILURE(*pErrorCode
)) {
486 fprintf(stderr
, "genprops error: unable to build trie for additional properties: %s\n", u_errorName(*pErrorCode
));
491 /* DerivedAge.txt ----------------------------------------------------------- */
493 static void U_CALLCONV
494 ageLineFn(void *context
,
495 char *fields
[][2], int32_t fieldCount
,
496 UErrorCode
*pErrorCode
) {
498 uint32_t value
, start
, limit
, version
;
500 u_parseCodePointRange(fields
[0][0], &start
, &limit
, pErrorCode
);
501 if(U_FAILURE(*pErrorCode
)) {
502 fprintf(stderr
, "genprops: syntax error in DerivedAge.txt field 0 at %s\n", fields
[0][0]);
507 /* parse version number */
508 s
=(char *)u_skipWhitespace(fields
[1][0]);
509 value
=(uint32_t)uprv_strtoul(s
, &end
, 10);
510 if(s
==end
|| value
==0 || value
>15 || (*end
!='.' && *end
!=' ' && *end
!='\t' && *end
!=0)) {
511 fprintf(stderr
, "genprops: syntax error in DerivedAge.txt field 1 at %s\n", fields
[1][0]);
512 *pErrorCode
=U_PARSE_ERROR
;
517 /* parse minor version number */
519 s
=(char *)u_skipWhitespace(end
+1);
520 value
=(uint32_t)uprv_strtoul(s
, &end
, 10);
521 if(s
==end
|| value
>15 || (*end
!=' ' && *end
!='\t' && *end
!=0)) {
522 fprintf(stderr
, "genprops: syntax error in DerivedAge.txt field 1 at %s\n", fields
[1][0]);
523 *pErrorCode
=U_PARSE_ERROR
;
529 if(!upvec_setValue(pv
, start
, limit
, 0, version
<<UPROPS_AGE_SHIFT
, UPROPS_AGE_MASK
, pErrorCode
)) {
530 fprintf(stderr
, "genprops error: unable to set character age: %s\n", u_errorName(*pErrorCode
));
535 /* DerivedNumericValues.txt ------------------------------------------------- */
537 static void U_CALLCONV
538 numericLineFn(void *context
,
539 char *fields
[][2], int32_t fieldCount
,
540 UErrorCode
*pErrorCode
) {
543 uint32_t start
, limit
, value
, oldProps32
;
548 /* get the code point range */
549 u_parseCodePointRange(fields
[0][0], &start
, &limit
, pErrorCode
);
550 if(U_FAILURE(*pErrorCode
)) {
551 fprintf(stderr
, "genprops: syntax error in DerivedNumericValues.txt field 0 at %s\n", fields
[0][0]);
556 /* check if the numeric value is a fraction (this code does not handle any) */
558 s
=uprv_strchr(fields
[1][0], '.');
561 while('0'<=(c
=*end
++) && c
<='9') {
572 /* parse numeric value */
573 s
=(char *)u_skipWhitespace(fields
[1][0]);
575 /* try large powers of 10 first, may otherwise overflow strtoul() */
576 if(0==uprv_strncmp(s
, "10000000000", 11)) {
577 /* large powers of 10 are encoded in a special way, see store.c */
580 while(*(++end
)=='0') {
584 /* normal number parsing */
585 value
=(uint32_t)uprv_strtoul(s
, &end
, 10);
587 if(end
<=s
|| (*end
!='.' && u_skipWhitespace(end
)!=fields
[1][1]) || value
>=0x80000000) {
588 fprintf(stderr
, "genprops: syntax error in DerivedNumericValues.txt field 1 at %s\n", fields
[0][0]);
594 * Unicode 4.0.1 removes the third column that used to list the numeric type.
595 * Assume that either the data is the same as in UnicodeData.txt,
596 * or else that the numeric type is "numeric".
597 * This should work because we only expect to add numeric values for
598 * Han characters; for those, UnicodeData.txt lists only ranges without
599 * specific properties for single characters.
602 for(; start
<limit
; ++start
) {
603 oldProps32
=getProps(start
);
604 oldType
=(int32_t)GET_NUMERIC_TYPE(oldProps32
);
606 /* this code point was already listed with its numeric value in UnicodeData.txt */
611 * Do not set a numeric value for code points that have other
612 * values or exceptions because the code below is not prepared
613 * to maintain such values and exceptions.
615 * Check store.c (e.g., file format description and makeProps())
616 * for details of what code points get their value field interpreted.
617 * For example, case mappings for Ll/Lt/Lu and mirror mappings for mirrored characters.
619 * For simplicity, and because we only expect to set numeric values for Han characters,
620 * for now we only allow to set these values for Lo characters.
622 if(GET_UNSIGNED_VALUE(oldProps32
)!=0 || PROPS_VALUE_IS_EXCEPTION(oldProps32
) || GET_CATEGORY(oldProps32
)!=U_OTHER_LETTER
) {
623 fprintf(stderr
, "genprops error: new numeric value for a character with some other value in DerivedNumericValues.txt at %s\n", fields
[0][0]);
628 fprintf(stderr
, "genprops: not prepared for new fractions in DerivedNumericValues.txt field 1 at %s\n", fields
[1][0]);
633 printf("adding U+%04x numeric type %d value %u\n", (int)start
, U_NT_NUMERIC
, (int)value
);
636 /* reconstruct the properties and set the new numeric type and value */
637 uprv_memset(&newProps
, 0, sizeof(newProps
));
639 newProps
.generalCategory
=(uint8_t)GET_CATEGORY(oldProps32
);
640 newProps
.bidi
=(uint8_t)GET_BIDI_CLASS(oldProps32
);
641 newProps
.isMirrored
=(uint8_t)(oldProps32
&(1UL<<UPROPS_MIRROR_SHIFT
) ? TRUE
: FALSE
);
642 newProps
.numericType
=(uint8_t)U_NT_NUMERIC
; /* assumed numeric type, see Unicode 4.0.1 comment */
643 newProps
.numericValue
=(int32_t)value
; /* newly parsed numeric value */
644 addProps(start
, makeProps(&newProps
));
648 /* DerivedBidiClass.txt ----------------------------------------------------- */
650 static void U_CALLCONV
651 bidiClassLineFn(void *context
,
652 char *fields
[][2], int32_t fieldCount
,
653 UErrorCode
*pErrorCode
) {
655 uint32_t oldStart
, start
, limit
, value
, props32
;
658 /* get the code point range */
659 u_parseCodePointRange(fields
[0][0], &start
, &limit
, pErrorCode
);
660 if(U_FAILURE(*pErrorCode
)) {
661 fprintf(stderr
, "genprops: syntax error in DerivedBidiClass.txt field 0 at %s\n", fields
[0][0]);
666 /* parse bidi class */
667 s
=trimTerminateField(fields
[1][0], fields
[1][1]);
668 value
=u_getPropertyValueEnum(UCHAR_BIDI_CLASS
, s
);
669 if((int32_t)value
<0) {
670 fprintf(stderr
, "genprops error: unknown bidi class in DerivedBidiClass.txt field 1 at %s\n", s
);
676 for(; start
<limit
; ++start
) {
677 props32
=getProps(start
);
679 /* ignore if this bidi class is already set */
680 if(value
==GET_BIDI_CLASS(props32
)) {
684 /* ignore old bidi class, set only for unassigned code points (Cn) */
685 if(GET_CATEGORY(props32
)!=0) {
686 /* error if this one contradicts what we parsed from UnicodeData.txt */
687 fprintf(stderr
, "genprops error: different bidi class in DerivedBidiClass.txt field 1 at %s\n", s
);
691 /* remove whatever bidi class was set before */
692 props32
&=~(0x1f<<UPROPS_BIDI_SHIFT
);
694 /* set bidi class for Cn according to DerivedBidiClass.txt */
695 props32
|=value
<<UPROPS_BIDI_SHIFT
;
697 /* set the modified properties */
698 addProps(start
, props32
);
702 if(didSet
&& beVerbose
) {
703 printf("setting U+%04x..U+%04x bidi class %d\n", (int)oldStart
, (int)limit
-1, (int)value
);
707 /* data serialization ------------------------------------------------------- */
710 writeAdditionalData(uint8_t *p
, int32_t capacity
, int32_t indexes
[UPROPS_INDEX_COUNT
]) {
712 UErrorCode errorCode
;
714 errorCode
=U_ZERO_ERROR
;
715 length
=utrie_serialize(trie
, p
, capacity
, getFoldedPropsValue
, TRUE
, &errorCode
);
716 if(U_FAILURE(errorCode
)) {
717 fprintf(stderr
, "genprops error: unable to serialize trie for additional properties: %s\n", u_errorName(errorCode
));
724 printf("size in bytes of additional props trie:%5u\n", (int)length
);
728 indexes
[UPROPS_ADDITIONAL_VECTORS_INDEX
]=
729 indexes
[UPROPS_ADDITIONAL_TRIE_INDEX
]+length
/4;
730 indexes
[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX
]=UPROPS_VECTOR_WORDS
;
731 indexes
[UPROPS_RESERVED_INDEX
]=
732 indexes
[UPROPS_ADDITIONAL_VECTORS_INDEX
]+pvCount
;
734 indexes
[UPROPS_MAX_VALUES_INDEX
]=
735 (((int32_t)U_LB_COUNT
-1)<<UPROPS_LB_SHIFT
)|
736 (((int32_t)U_EA_COUNT
-1)<<UPROPS_EA_SHIFT
)|
737 (((int32_t)UBLOCK_COUNT
-1)<<UPROPS_BLOCK_SHIFT
)|
738 ((int32_t)USCRIPT_CODE_LIMIT
-1);
739 indexes
[UPROPS_MAX_VALUES_2_INDEX
]=
740 (((int32_t)U_JT_COUNT
-1)<<UPROPS_JT_SHIFT
)|
741 (((int32_t)U_JG_COUNT
-1)<<UPROPS_JG_SHIFT
)|
742 ((int32_t)U_DT_COUNT
-1);
745 if(p
!=NULL
&& (pvCount
*4)<=capacity
) {
746 uprv_memcpy(p
, pv
, pvCount
*4);
748 printf("number of additional props vectors: %5u\n", (int)pvCount
/UPROPS_VECTOR_WORDS
);
749 printf("number of 32-bit words per vector: %5u\n", UPROPS_VECTOR_WORDS
);