2 *******************************************************************************
4 * Copyright (C) 2002-2006, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 2002feb24
14 * created by: Markus W. Scherer
16 * Parse more Unicode Character Database files and store
17 * additional Unicode character properties in bit set vectors.
21 #include "unicode/utypes.h"
22 #include "unicode/uchar.h"
23 #include "unicode/uscript.h"
33 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
35 /* data --------------------------------------------------------------------- */
37 static UNewTrie
*trie
;
39 static int32_t pvCount
;
41 /* miscellaneous ------------------------------------------------------------ */
44 trimTerminateField(char *s
, char *limit
) {
45 /* trim leading whitespace */
46 s
=(char *)u_skipWhitespace(s
);
48 /* trim trailing whitespace */
49 while(s
<limit
&& (*(limit
-1)==' ' || *(limit
-1)=='\t')) {
58 parseTwoFieldFile(char *filename
, char *basename
,
59 const char *ucdFile
, const char *suffix
,
61 UErrorCode
*pErrorCode
) {
64 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
68 writeUCDFilename(basename
, ucdFile
, suffix
);
70 u_parseDelimitedFile(filename
, ';', fields
, 2, lineFn
, NULL
, pErrorCode
);
71 if(U_FAILURE(*pErrorCode
)) {
72 fprintf(stderr
, "error parsing %s.txt: %s\n", ucdFile
, u_errorName(*pErrorCode
));
76 static void U_CALLCONV
77 ageLineFn(void *context
,
78 char *fields
[][2], int32_t fieldCount
,
79 UErrorCode
*pErrorCode
);
82 parseMultiFieldFile(char *filename
, char *basename
,
83 const char *ucdFile
, const char *suffix
,
86 UErrorCode
*pErrorCode
) {
89 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
93 writeUCDFilename(basename
, ucdFile
, suffix
);
95 u_parseDelimitedFile(filename
, ';', fields
, fieldCount
, lineFn
, NULL
, pErrorCode
);
96 if(U_FAILURE(*pErrorCode
)) {
97 fprintf(stderr
, "error parsing %s.txt: %s\n", ucdFile
, u_errorName(*pErrorCode
));
101 static void U_CALLCONV
102 numericLineFn(void *context
,
103 char *fields
[][2], int32_t fieldCount
,
104 UErrorCode
*pErrorCode
);
106 /* parse files with single enumerated properties ---------------------------- */
109 const char *ucdFile
, *propName
;
111 int32_t vecWord
, vecShift
;
114 typedef struct SingleEnum SingleEnum
;
117 parseSingleEnumFile(char *filename
, char *basename
, const char *suffix
,
118 const SingleEnum
*sen
,
119 UErrorCode
*pErrorCode
);
121 static const SingleEnum scriptSingleEnum
={
124 0, 0, UPROPS_SCRIPT_MASK
127 static const SingleEnum blockSingleEnum
={
130 0, UPROPS_BLOCK_SHIFT
, UPROPS_BLOCK_MASK
133 static const SingleEnum graphemeClusterBreakSingleEnum
={
134 "GraphemeBreakProperty", "Grapheme_Cluster_Break",
135 UCHAR_GRAPHEME_CLUSTER_BREAK
,
136 2, UPROPS_GCB_SHIFT
, UPROPS_GCB_MASK
139 static const SingleEnum wordBreakSingleEnum
={
140 "WordBreakProperty", "Word_Break",
142 2, UPROPS_WB_SHIFT
, UPROPS_WB_MASK
145 static const SingleEnum sentenceBreakSingleEnum
={
146 "SentenceBreakProperty", "Sentence_Break",
147 UCHAR_SENTENCE_BREAK
,
148 2, UPROPS_SB_SHIFT
, UPROPS_SB_MASK
151 static const SingleEnum lineBreakSingleEnum
={
152 "LineBreak", "line break",
154 0, UPROPS_LB_SHIFT
, UPROPS_LB_MASK
157 static const SingleEnum eawSingleEnum
={
158 "EastAsianWidth", "east asian width",
159 UCHAR_EAST_ASIAN_WIDTH
,
160 0, UPROPS_EA_SHIFT
, UPROPS_EA_MASK
163 static void U_CALLCONV
164 singleEnumLineFn(void *context
,
165 char *fields
[][2], int32_t fieldCount
,
166 UErrorCode
*pErrorCode
) {
167 const SingleEnum
*sen
;
169 uint32_t start
, limit
, uv
;
172 sen
=(const SingleEnum
*)context
;
174 u_parseCodePointRange(fields
[0][0], &start
, &limit
, pErrorCode
);
175 if(U_FAILURE(*pErrorCode
)) {
176 fprintf(stderr
, "genprops: syntax error in %s.txt field 0 at %s\n", sen
->ucdFile
, fields
[0][0]);
181 /* parse property alias */
182 s
=trimTerminateField(fields
[1][0], fields
[1][1]);
183 value
=u_getPropertyValueEnum(sen
->prop
, s
);
185 if(sen
->prop
==UCHAR_BLOCK
) {
186 if(isToken("Greek", s
)) {
187 value
=UBLOCK_GREEK
; /* Unicode 3.2 renames this to "Greek and Coptic" */
188 } else if(isToken("Combining Marks for Symbols", s
)) {
189 value
=UBLOCK_COMBINING_MARKS_FOR_SYMBOLS
; /* Unicode 3.2 renames this to "Combining Diacritical Marks for Symbols" */
190 } else if(isToken("Private Use", s
)) {
191 value
=UBLOCK_PRIVATE_USE
; /* Unicode 3.2 renames this to "Private Use Area" */
196 fprintf(stderr
, "genprops error: unknown %s name in %s.txt field 1 at %s\n",
197 sen
->propName
, sen
->ucdFile
, s
);
201 uv
=(uint32_t)(value
<<sen
->vecShift
);
202 if((uv
&sen
->vecMask
)!=uv
) {
203 fprintf(stderr
, "genprops error: %s value overflow (0x%x) at %s\n",
204 sen
->propName
, (int)uv
, s
);
205 exit(U_INTERNAL_PROGRAM_ERROR
);
208 if(!upvec_setValue(pv
, start
, limit
, sen
->vecWord
, uv
, sen
->vecMask
, pErrorCode
)) {
209 fprintf(stderr
, "genprops error: unable to set %s code: %s\n",
210 sen
->propName
, u_errorName(*pErrorCode
));
216 parseSingleEnumFile(char *filename
, char *basename
, const char *suffix
,
217 const SingleEnum
*sen
,
218 UErrorCode
*pErrorCode
) {
221 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
225 writeUCDFilename(basename
, sen
->ucdFile
, suffix
);
227 u_parseDelimitedFile(filename
, ';', fields
, 2, singleEnumLineFn
, (void *)sen
, pErrorCode
);
228 if(U_FAILURE(*pErrorCode
)) {
229 fprintf(stderr
, "error parsing %s.txt: %s\n", sen
->ucdFile
, u_errorName(*pErrorCode
));
233 /* parse files with multiple binary properties ------------------------------ */
236 const char *propName
;
237 int32_t vecWord
, vecShift
;
239 typedef struct Binary Binary
;
243 const Binary
*binaries
;
244 int32_t binariesCount
;
246 typedef struct Binaries Binaries
;
250 { "White_Space", 1, UPROPS_WHITE_SPACE
},
251 { "Dash", 1, UPROPS_DASH
},
252 { "Hyphen", 1, UPROPS_HYPHEN
},
253 { "Quotation_Mark", 1, UPROPS_QUOTATION_MARK
},
254 { "Terminal_Punctuation", 1, UPROPS_TERMINAL_PUNCTUATION
},
255 { "Hex_Digit", 1, UPROPS_HEX_DIGIT
},
256 { "ASCII_Hex_Digit", 1, UPROPS_ASCII_HEX_DIGIT
},
257 { "Ideographic", 1, UPROPS_IDEOGRAPHIC
},
258 { "Diacritic", 1, UPROPS_DIACRITIC
},
259 { "Extender", 1, UPROPS_EXTENDER
},
260 { "Noncharacter_Code_Point", 1, UPROPS_NONCHARACTER_CODE_POINT
},
261 { "Grapheme_Link", 1, UPROPS_GRAPHEME_LINK
},
262 { "IDS_Binary_Operator", 1, UPROPS_IDS_BINARY_OPERATOR
},
263 { "IDS_Trinary_Operator", 1, UPROPS_IDS_TRINARY_OPERATOR
},
264 { "Radical", 1, UPROPS_RADICAL
},
265 { "Unified_Ideograph", 1, UPROPS_UNIFIED_IDEOGRAPH
},
266 { "Deprecated", 1, UPROPS_DEPRECATED
},
267 { "Logical_Order_Exception", 1, UPROPS_LOGICAL_ORDER_EXCEPTION
},
269 /* new properties in Unicode 4.0.1 */
270 { "STerm", 2, UPROPS_V2_S_TERM
},
271 { "Variation_Selector", 2, UPROPS_V2_VARIATION_SELECTOR
},
273 /* new properties in Unicode 4.1 */
274 { "Pattern_Syntax", 2, UPROPS_V2_PATTERN_SYNTAX
},
275 { "Pattern_White_Space", 2, UPROPS_V2_PATTERN_WHITE_SPACE
}
278 static const Binaries
280 "PropList", propListNames
, LENGTHOF(propListNames
)
284 derCorePropsNames
[]={
285 { "XID_Start", 1, UPROPS_XID_START
},
286 { "XID_Continue", 1, UPROPS_XID_CONTINUE
},
288 /* before Unicode 4/ICU 2.6/format version 3.2, these used to be Other_XYZ from PropList.txt */
289 { "Math", 1, UPROPS_MATH
},
290 { "Alphabetic", 1, UPROPS_ALPHABETIC
},
291 { "Grapheme_Extend", 1, UPROPS_GRAPHEME_EXTEND
},
292 { "Default_Ignorable_Code_Point", 1, UPROPS_DEFAULT_IGNORABLE_CODE_POINT
},
294 /* new properties bits in ICU 2.6/format version 3.2 */
295 { "ID_Start", 1, UPROPS_ID_START
},
296 { "ID_Continue", 1, UPROPS_ID_CONTINUE
},
297 { "Grapheme_Base", 1, UPROPS_GRAPHEME_BASE
},
300 * Unicode 5/ICU 3.6 moves Grapheme_Link from PropList.txt
301 * to DerivedCoreProperties.txt and deprecates it.
303 { "Grapheme_Link", 1, UPROPS_GRAPHEME_LINK
}
306 static const Binaries
307 derCorePropsBinaries
={
308 "DerivedCoreProperties", derCorePropsNames
, LENGTHOF(derCorePropsNames
)
311 static char ignoredProps
[100][64];
312 static int32_t ignoredPropsCount
;
315 addIgnoredProp(char *s
, char *limit
) {
318 s
=trimTerminateField(s
, limit
);
319 for(i
=0; i
<ignoredPropsCount
; ++i
) {
320 if(0==uprv_strcmp(ignoredProps
[i
], s
)) {
324 uprv_strcpy(ignoredProps
[ignoredPropsCount
++], s
);
327 static void U_CALLCONV
328 binariesLineFn(void *context
,
329 char *fields
[][2], int32_t fieldCount
,
330 UErrorCode
*pErrorCode
) {
333 uint32_t start
, limit
, uv
;
336 bin
=(const Binaries
*)context
;
338 u_parseCodePointRange(fields
[0][0], &start
, &limit
, pErrorCode
);
339 if(U_FAILURE(*pErrorCode
)) {
340 fprintf(stderr
, "genprops: syntax error in %s.txt field 0 at %s\n", bin
->ucdFile
, fields
[0][0]);
345 /* parse binary property name */
346 s
=(char *)u_skipWhitespace(fields
[1][0]);
348 if(i
==bin
->binariesCount
) {
349 /* ignore unrecognized properties */
351 addIgnoredProp(s
, fields
[1][1]);
355 if(isToken(bin
->binaries
[i
].propName
, s
)) {
360 if(bin
->binaries
[i
].vecShift
>=32) {
361 fprintf(stderr
, "genprops error: shift value %d>=32 for %s %s\n",
362 (int)bin
->binaries
[i
].vecShift
, bin
->ucdFile
, bin
->binaries
[i
].propName
);
363 exit(U_INTERNAL_PROGRAM_ERROR
);
365 uv
=U_MASK(bin
->binaries
[i
].vecShift
);
367 if(!upvec_setValue(pv
, start
, limit
, bin
->binaries
[i
].vecWord
, uv
, uv
, pErrorCode
)) {
368 fprintf(stderr
, "genprops error: unable to set %s code: %s\n",
369 bin
->binaries
[i
].propName
, u_errorName(*pErrorCode
));
375 parseBinariesFile(char *filename
, char *basename
, const char *suffix
,
377 UErrorCode
*pErrorCode
) {
381 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
385 writeUCDFilename(basename
, bin
->ucdFile
, suffix
);
389 u_parseDelimitedFile(filename
, ';', fields
, 2, binariesLineFn
, (void *)bin
, pErrorCode
);
390 if(U_FAILURE(*pErrorCode
)) {
391 fprintf(stderr
, "error parsing %s.txt: %s\n", bin
->ucdFile
, u_errorName(*pErrorCode
));
395 for(i
=0; i
<ignoredPropsCount
; ++i
) {
396 printf("genprops: ignoring property %s in %s.txt\n", ignoredProps
[i
], bin
->ucdFile
);
401 /* -------------------------------------------------------------------------- */
404 initAdditionalProperties() {
405 pv
=upvec_open(UPROPS_VECTOR_WORDS
, 20000);
409 exitAdditionalProperties() {
415 generateAdditionalProperties(char *filename
, const char *suffix
, UErrorCode
*pErrorCode
) {
418 basename
=filename
+uprv_strlen(filename
);
420 /* process various UCD .txt files */
422 /* add Han numeric types & values */
423 parseMultiFieldFile(filename
, basename
, "DerivedNumericValues", suffix
, 2, numericLineFn
, pErrorCode
);
425 parseTwoFieldFile(filename
, basename
, "DerivedAge", suffix
, ageLineFn
, pErrorCode
);
430 * "Common - For characters that may be used
431 * within multiple scripts,
432 * or any unassigned code points."
435 * "The value COMMON is the default value,
436 * given to all code points that are not
437 * explicitly mentioned in the data file."
439 * COMMON==USCRIPT_COMMON==0 - nothing to do
441 parseSingleEnumFile(filename
, basename
, suffix
, &scriptSingleEnum
, pErrorCode
);
443 parseSingleEnumFile(filename
, basename
, suffix
, &blockSingleEnum
, pErrorCode
);
445 parseBinariesFile(filename
, basename
, suffix
, &propListBinaries
, pErrorCode
);
447 parseBinariesFile(filename
, basename
, suffix
, &derCorePropsBinaries
, pErrorCode
);
449 parseSingleEnumFile(filename
, basename
, suffix
, &graphemeClusterBreakSingleEnum
, pErrorCode
);
451 parseSingleEnumFile(filename
, basename
, suffix
, &wordBreakSingleEnum
, pErrorCode
);
453 parseSingleEnumFile(filename
, basename
, suffix
, &sentenceBreakSingleEnum
, pErrorCode
);
456 * LineBreak-4.0.0.txt:
457 * - All code points, assigned and unassigned, that are not listed
458 * explicitly are given the value "XX".
460 * XX==U_LB_UNKNOWN==0 - nothing to do
462 parseSingleEnumFile(filename
, basename
, suffix
, &lineBreakSingleEnum
, pErrorCode
);
465 * Preset East Asian Width defaults:
467 * http://www.unicode.org/reports/tr11/#Unassigned
468 * 7.1 Unassigned and Private Use characters
470 * All unassigned characters are by default classified as non-East Asian neutral,
471 * except for the range U+20000 to U+2FFFD,
472 * since all code positions from U+20000 to U+2FFFD are intended for CJK ideographs (W).
473 * All Private use characters are by default classified as ambiguous,
474 * since their definition depends on context.
476 * N for all ==0 - nothing to do
480 *pErrorCode
=U_ZERO_ERROR
;
481 if( !upvec_setValue(pv
, 0xe000, 0xf900, 0, (uint32_t)(U_EA_AMBIGUOUS
<<UPROPS_EA_SHIFT
), UPROPS_EA_MASK
, pErrorCode
) ||
482 !upvec_setValue(pv
, 0xf0000, 0xffffe, 0, (uint32_t)(U_EA_AMBIGUOUS
<<UPROPS_EA_SHIFT
), UPROPS_EA_MASK
, pErrorCode
) ||
483 !upvec_setValue(pv
, 0x100000, 0x10fffe, 0, (uint32_t)(U_EA_AMBIGUOUS
<<UPROPS_EA_SHIFT
), UPROPS_EA_MASK
, pErrorCode
) ||
484 !upvec_setValue(pv
, 0x20000, 0x2fffe, 0, (uint32_t)(U_EA_WIDE
<<UPROPS_EA_SHIFT
), UPROPS_EA_MASK
, pErrorCode
)
486 fprintf(stderr
, "genprops: unable to set default East Asian Widths: %s\n", u_errorName(*pErrorCode
));
490 /* parse EastAsianWidth.txt */
491 parseSingleEnumFile(filename
, basename
, suffix
, &eawSingleEnum
, pErrorCode
);
493 trie
=utrie_open(NULL
, NULL
, 50000, 0, 0, TRUE
);
495 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
500 pvCount
=upvec_compact(pv
, upvec_compactToTrieHandler
, trie
, pErrorCode
);
501 if(U_FAILURE(*pErrorCode
)) {
502 fprintf(stderr
, "genprops error: unable to build trie for additional properties: %s\n", u_errorName(*pErrorCode
));
507 /* DerivedAge.txt ----------------------------------------------------------- */
509 static void U_CALLCONV
510 ageLineFn(void *context
,
511 char *fields
[][2], int32_t fieldCount
,
512 UErrorCode
*pErrorCode
) {
514 uint32_t value
, start
, limit
, version
;
516 u_parseCodePointRange(fields
[0][0], &start
, &limit
, pErrorCode
);
517 if(U_FAILURE(*pErrorCode
)) {
518 fprintf(stderr
, "genprops: syntax error in DerivedAge.txt field 0 at %s\n", fields
[0][0]);
523 /* ignore "unassigned" (the default is already set to 0.0) */
524 s
=(char *)u_skipWhitespace(fields
[1][0]);
525 if(0==uprv_strncmp(s
, "unassigned", 10)) {
529 /* parse version number */
530 value
=(uint32_t)uprv_strtoul(s
, &end
, 10);
531 if(s
==end
|| value
==0 || value
>15 || (*end
!='.' && *end
!=' ' && *end
!='\t' && *end
!=0)) {
532 fprintf(stderr
, "genprops: syntax error in DerivedAge.txt field 1 at %s\n", fields
[1][0]);
533 *pErrorCode
=U_PARSE_ERROR
;
538 /* parse minor version number */
540 s
=(char *)u_skipWhitespace(end
+1);
541 value
=(uint32_t)uprv_strtoul(s
, &end
, 10);
542 if(s
==end
|| value
>15 || (*end
!=' ' && *end
!='\t' && *end
!=0)) {
543 fprintf(stderr
, "genprops: syntax error in DerivedAge.txt field 1 at %s\n", fields
[1][0]);
544 *pErrorCode
=U_PARSE_ERROR
;
550 if(!upvec_setValue(pv
, start
, limit
, 0, version
<<UPROPS_AGE_SHIFT
, UPROPS_AGE_MASK
, pErrorCode
)) {
551 fprintf(stderr
, "genprops error: unable to set character age: %s\n", u_errorName(*pErrorCode
));
556 /* DerivedNumericValues.txt ------------------------------------------------- */
558 static void U_CALLCONV
559 numericLineFn(void *context
,
560 char *fields
[][2], int32_t fieldCount
,
561 UErrorCode
*pErrorCode
) {
562 Props newProps
={ 0 };
564 uint32_t start
, limit
, value
, oldProps32
;
569 /* get the code point range */
570 u_parseCodePointRange(fields
[0][0], &start
, &limit
, pErrorCode
);
571 if(U_FAILURE(*pErrorCode
)) {
572 fprintf(stderr
, "genprops: syntax error in DerivedNumericValues.txt field 0 at %s\n", fields
[0][0]);
577 /* check if the numeric value is a fraction (this code does not handle any) */
579 s
=uprv_strchr(fields
[1][0], '.');
582 while('0'<=(c
=*end
++) && c
<='9') {
593 /* parse numeric value */
594 s
=(char *)u_skipWhitespace(fields
[1][0]);
596 /* try large powers of 10 first, may otherwise overflow strtoul() */
597 if(0==uprv_strncmp(s
, "10000000000", 11)) {
598 /* large powers of 10 are encoded in a special way, see store.c */
602 while(*(++end
)=='0') {
606 newProps
.exponent
=exp
;
608 /* normal number parsing */
609 value
=(uint32_t)uprv_strtoul(s
, &end
, 10);
611 if(end
<=s
|| (*end
!='.' && u_skipWhitespace(end
)!=fields
[1][1]) || value
>=0x80000000) {
612 fprintf(stderr
, "genprops: syntax error in DerivedNumericValues.txt field 1 at %s\n", fields
[0][0]);
618 * Unicode 4.0.1 removes the third column that used to list the numeric type.
619 * Assume that either the data is the same as in UnicodeData.txt,
620 * or else that the numeric type is "numeric".
621 * This should work because we only expect to add numeric values for
622 * Han characters; for those, UnicodeData.txt lists only ranges without
623 * specific properties for single characters.
626 /* set the new numeric type and value */
627 newProps
.numericType
=(uint8_t)U_NT_NUMERIC
; /* assumed numeric type, see Unicode 4.0.1 comment */
628 newProps
.numericValue
=(int32_t)value
; /* newly parsed numeric value */
629 /* the exponent may have been set above */
630 value
=makeProps(&newProps
);
632 for(; start
<limit
; ++start
) {
633 oldProps32
=getProps(start
);
634 oldType
=(int32_t)GET_NUMERIC_TYPE(oldProps32
);
638 /* this code point was already listed with its numeric value in UnicodeData.txt */
641 fprintf(stderr
, "genprops: not prepared for new fractions in DerivedNumericValues.txt field 1 at %s\n", fields
[1][0]);
647 * For simplicity, and because we only expect to set numeric values for Han characters,
648 * for now we only allow to set these values for Lo characters.
650 if(oldType
==0 && GET_CATEGORY(oldProps32
)!=U_OTHER_LETTER
) {
651 fprintf(stderr
, "genprops error: new numeric value for a character other than Lo in DerivedNumericValues.txt at %s\n", fields
[0][0]);
655 /* verify that we do not change an existing value (fractions were excluded above) */
657 /* the code point already has a value stored */
658 if((oldProps32
&0xff00)!=(value
&0xff00)) {
659 fprintf(stderr
, "genprops error: new numeric value differs from old one for U+%04lx\n", (long)start
);
662 /* same value, continue */
664 /* the code point is getting a new numeric value */
666 printf("adding U+%04x numeric type %d value 0x%04x from %s\n", (int)start
, U_NT_NUMERIC
, (int)value
, fields
[0][0]);
669 addProps(start
, value
|GET_CATEGORY(oldProps32
));
674 /* data serialization ------------------------------------------------------- */
677 writeAdditionalData(FILE *f
, uint8_t *p
, int32_t capacity
, int32_t indexes
[UPROPS_INDEX_COUNT
]) {
679 UErrorCode errorCode
;
681 errorCode
=U_ZERO_ERROR
;
682 length
=utrie_serialize(trie
, p
, capacity
, NULL
, TRUE
, &errorCode
);
683 if(U_FAILURE(errorCode
)) {
684 fprintf(stderr
, "genprops error: unable to serialize trie for additional properties: %s\n", u_errorName(errorCode
));
689 printf("size in bytes of additional props trie:%5u\n", (int)length
);
692 UTrie trie2
={ NULL
};
693 utrie_unserialize(&trie2
, p
, length
, &errorCode
);
694 if(U_FAILURE(errorCode
)) {
697 "genprops error: failed to utrie_unserialize(trie for additional properties) - %s\n",
698 u_errorName(errorCode
));
701 usrc_writeUTrieArrays(f
,
702 "static const uint16_t propsVectorsTrie_index[%ld]={\n", NULL
,
705 usrc_writeUTrieStruct(f
,
706 "static const UTrie propsVectorsTrie={\n",
707 &trie2
, "propsVectorsTrie_index", NULL
, NULL
,
715 indexes
[UPROPS_ADDITIONAL_VECTORS_INDEX
]=
716 indexes
[UPROPS_ADDITIONAL_TRIE_INDEX
]+length
/4;
717 indexes
[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX
]=UPROPS_VECTOR_WORDS
;
718 indexes
[UPROPS_RESERVED_INDEX
]=
719 indexes
[UPROPS_ADDITIONAL_VECTORS_INDEX
]+pvCount
;
721 indexes
[UPROPS_MAX_VALUES_INDEX
]=
722 (((int32_t)U_LB_COUNT
-1)<<UPROPS_LB_SHIFT
)|
723 (((int32_t)U_EA_COUNT
-1)<<UPROPS_EA_SHIFT
)|
724 (((int32_t)UBLOCK_COUNT
-1)<<UPROPS_BLOCK_SHIFT
)|
725 ((int32_t)USCRIPT_CODE_LIMIT
-1);
726 indexes
[UPROPS_MAX_VALUES_2_INDEX
]=
727 (((int32_t)U_SB_COUNT
-1)<<UPROPS_SB_SHIFT
)|
728 (((int32_t)U_WB_COUNT
-1)<<UPROPS_WB_SHIFT
)|
729 (((int32_t)U_GCB_COUNT
-1)<<UPROPS_GCB_SHIFT
)|
730 ((int32_t)U_DT_COUNT
-1);
733 if(p
!=NULL
&& (pvCount
*4)<=capacity
) {
736 "static const uint32_t propsVectors[%ld]={\n",
739 fprintf(f
, "static const int32_t countPropsVectors=%ld;\n", (long)pvCount
);
740 fprintf(f
, "static const int32_t propsVectorsColumns=%ld;\n", (long)indexes
[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX
]);
742 uprv_memcpy(p
, pv
, pvCount
*4);
745 printf("number of additional props vectors: %5u\n", (int)pvCount
/UPROPS_VECTOR_WORDS
);
746 printf("number of 32-bit words per vector: %5u\n", UPROPS_VECTOR_WORDS
);