X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..46f4442e9a5a4f3b98b7c1083586332f6a8a99a4:/icuSources/tools/genprops/store.c diff --git a/icuSources/tools/genprops/store.c b/icuSources/tools/genprops/store.c index 949deedd..4e642d4d 100644 --- a/icuSources/tools/genprops/store.c +++ b/icuSources/tools/genprops/store.c @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 1999-2003, International Business Machines +* Copyright (C) 1999-2008, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -18,15 +18,14 @@ */ #include -#include #include "unicode/utypes.h" #include "unicode/uchar.h" #include "cmemory.h" #include "cstring.h" -#include "filestrm.h" #include "utrie.h" #include "unicode/udata.h" #include "unewdata.h" +#include "writesrc.h" #include "uprops.h" #include "genprops.h" @@ -42,7 +41,19 @@ the udata API for loading ICU data. Especially, a UDataInfo structure precedes the actual data. It contains platform properties values and the file format version. -The following is a description of format version 3 . +The following is a description of format version 5 . + +The format changes between version 3 and 4 because the properties related to +case mappings and bidi/shaping are pulled out into separate files +for modularization. +In order to reduce the need for code changes, some of the previous data +structures are omitted, rather than rearranging everything. + +For details see "Changes in format version 4" below. + +Format version 5 became necessary because the bit field for script codes +overflowed. Several bit fields got rearranged, and three (Script, Block, +Word_Break) got widened by one bit each. Data contents: @@ -63,6 +74,10 @@ Formally, the file contains the following structures: const int32_t indexes[16] with values i0..i15: + i0 indicates the length of the main trie. + i0..i3 all have the same value in format version 4.0; + the related props32[] and exceptions[] and uchars[] were used in format version 3 + i0 propsIndex; -- 32-bit unit index to the table of 32-bit properties words i1 exceptionsIndex; -- 32-bit unit index to the table of 32-bit exception words i2 exceptionsTopIndex; -- 32-bit unit index to the array of UChars for special mappings @@ -74,12 +89,14 @@ Formally, the file contains the following structures: i6 reservedItemIndex; -- 32-bit unit index to the top of the properties vectors table i7..i9 reservedIndexes; -- reserved values; 0 for now - i10 maxValues; -- maximum code values for vector word 0, see uprops.h (format version 3.1+) - i11 maxValues2; -- maximum code values for vector word 2, see uprops.h (format version 3.2) + i10 maxValues; -- maximum code values for vector word 0, see uprops.h (new in format version 3.1+) + i11 maxValues2; -- maximum code values for vector word 2, see uprops.h (new in format version 3.2) i12..i15 reservedIndexes; -- reserved values; 0 for now PT serialized properties trie, see utrie.h (byte size: 4*(i0-16)) + P, E, and U are not used (empty) in format version 4 + P const uint32_t props32[i1-i0]; E const uint32_t exceptions[i2-i1]; U const UChar uchars[2*(i3-i2)]; @@ -99,14 +116,7 @@ the Unicode code assignment are exploited: The lookup of properties for a given code point is done with a trie lookup, using the UTrie implementation. -The trie lookup result is a 16-bit index in the props32[] table where the -actual 32-bit properties word is stored. This is done to save space. - -(There are thousands of 16-bit entries in the trie data table, but -only a few hundred unique 32-bit properties words. -If the trie data table contained 32-bit words directly, then that would be -larger because the length of the table would be the same as now but the -width would be 32 bits instead of 16. This saves more than 10kB.) +The trie lookup result is a 16-bit properties word. With a given Unicode code point @@ -114,141 +124,51 @@ With a given Unicode code point and 0<=c<0x110000, the lookup is done like this: - uint16_t i; - UTRIE_GET16(c, i); - uint32_t props=p32[i]; - -For some characters, not all of the properties can be efficiently encoded -using 32 bits. For them, the 32-bit word contains an index into the exceptions[] -array: - - if(props&EXCEPTION_BIT)) { - uint16_t e=(uint16_t)(props>>VALUE_SHIFT); - ... - } - -The exception values are a variable number of uint32_t starting at - - const uint32_t *pe=p32+exceptionsIndex+e; - -The first uint32_t there contains flags about what values actually follow it. -Some of the exception values are UChar32 code points for the case mappings, -others are numeric values etc. + uint16_t props; + UTRIE_GET16(trie, c, props); -32-bit properties sets: - -Each 32-bit properties word contains: +Each 16-bit properties word contains: 0.. 4 general category - 5 has exception values - 6..10 BiDi category -11 is mirrored -12..14 numericType: - 0 no numeric value - 1 decimal digit value - 2 digit value - 3 numeric value - ### TODO: type 4 for Han digits & numbers?! -15..19 reserved -20..31 value according to bits 0..5: - if(has exception) { - exception index; - } else switch(general category) { - case Ll: delta to uppercase; -- same as titlecase - case Lu: -delta to lowercase; -- titlecase is same as c - case Lt: -delta to lowercase; -- uppercase is same as c - default: - if(is mirrored) { - delta to mirror; - } else if(numericType!=0) { - numericValue; - } else { - 0; - }; - } - -Exception values: - -In the first uint32_t exception word for a code point, -bits -31..16 reserved -15..0 flags that indicate which values follow: - -bit - 0 has uppercase mapping - 1 has lowercase mapping - 2 has titlecase mapping - 3 unused - 4 has numeric value (numerator) - if numericValue=0x7fffff00+x then numericValue=10^x - 5 has denominator value - 6 has a mirror-image Unicode code point - 7 has SpecialCasing.txt entries - 8 has CaseFolding.txt entries - -According to the flags in this word, one or more uint32_t words follow it -in the sequence of the bit flags in the flags word; if a flag is not set, -then the value is missing or 0: - -For the case mappings and the mirror-image Unicode code point, -one uint32_t or UChar32 each is the code point. -If the titlecase mapping is missing, then it is the same as the uppercase mapping. - -For the digit values, bits 31..16 contain the decimal digit value, and -bits 15..0 contain the digit value. A value of -1 indicates that -this value is missing. - -For the numeric/numerator value, an int32_t word contains the value directly, -except for when there is no numerator but a denominator, then the numerator -is implicitly 1. This means: - numerator denominator result - none none none - x none x - none y 1/y - x y x/y - -If the numerator value is 0x7fffff00+x then it is replaced with 10^x. - -For the denominator value, a uint32_t word contains the value directly. - -For special casing mappings, the 32-bit exception word contains: -31 if set, this character has complex, conditional mappings - that are not stored; - otherwise, the mappings are stored according to the following bits -30..24 number of UChars used for mappings -23..16 reserved -15.. 0 UChar offset from the beginning of the UChars array where the - UChars for the special case mappings are stored in the following format: - -Format of special casing UChars: -One UChar value with lengths as follows: -14..10 number of UChars for titlecase mapping - 9.. 5 number of UChars for uppercase mapping - 4.. 0 number of UChars for lowercase mapping - -Followed by the UChars for lowercase, uppercase, titlecase mappings in this order. - -For case folding mappings, the 32-bit exception word contains: -31..24 number of UChars used for the full mapping -23..16 reserved -15.. 0 UChar offset from the beginning of the UChars array where the - UChars for the special case mappings are stored in the following format: - -Format of case folding UChars: -Two UChars contain the simple mapping as follows: - 0, 0 no simple mapping - BMP,0 a simple mapping to a BMP code point - s1, s2 a simple mapping to a supplementary code point stored as two surrogates -This is followed by the UChars for the full case folding mappings. - -Example: -U+2160, ROMAN NUMERAL ONE, needs an exception because it has a lowercase -mapping and a numeric value. -Its exception values would be stored as 3 uint32_t words: - -- flags=0x0a (see above) with combining class 0 -- lowercase mapping 0x2170 -- numeric value=1 + 5.. 7 numeric type + non-digit numbers are stored with multiple types and pseudo-types + in order to facilitate compact encoding: + 0 no numeric value (0) + 1 decimal digit value (0..9) + 2 digit value (0..9) + 3 (U_NT_NUMERIC) normal non-digit numeric value 0..0xff + 4 (internal type UPROPS_NT_FRACTION) fraction + 5 (internal type UPROPS_NT_LARGE) large number >0xff + 6..7 reserved + + when returning the numeric type from a public API, + internal types must be turned into U_NT_NUMERIC + + 8..15 numeric value + encoding of fractions and large numbers see below + +Fractions: + // n is the 8-bit numeric value from bits 8..15 of the trie word (shifted down) + int32_t num, den; + num=n>>3; // num=0..31 + den=(n&7)+2; // den=2..9 + if(num==0) { + num=-1; // num=-1 or 1..31 + } + double result=(double)num/(double)den; + +Large numbers: + // n is the 8-bit numeric value from bits 8..15 of the trie word (shifted down) + int32_t m, e; + m=n>>4; // m=0..15 + e=(n&0xf); + if(m==0) { + m=1; // for large powers of 10 + e+=18; // e=18..33 + } else { + e+=2; // e=2..17 + } // m==10..15 are reserved + double result=(double)m*10^e; --- Additional properties (new in format version 2.1) --- @@ -277,6 +197,39 @@ See i10 maxValues above, contains only UBLOCK_COUNT and USCRIPT_CODE_LIMIT. - i10 also contains U_LB_COUNT and U_EA_COUNT. - i11 contains maxValues2 for vector word 2. +--- Changes in format version 4 --- + +The format changes between version 3 and 4 because the properties related to +case mappings and bidi/shaping are pulled out into separate files +for modularization. +In order to reduce the need for code changes, some of the previous data +structures are omitted, rather than rearranging everything. + +(The change to format version 4 is for ICU 3.4. The last CVS revision of +genprops/store.c for format version 3.2 is 1.48.) + +The main trie's data is significantly simplified: +- The trie's 16-bit data word is used directly instead of as an index + into props32[]. +- The trie uses the default trie folding functions instead of custom ones. +- Numeric values are stored directly in the trie data word, with special + encodings. +- No more exception data (the data that needed it was pulled out, or, in the + case of numeric values, encoded differently). +- No more string data (pulled out - was for case mappings). + +Also, some of the previously used properties vector bits are reserved again. + +The indexes[] values for the omitted structures are still filled in +(indicating zero-length arrays) so that the swapper code remains unchanged. + +--- Changes in format version 5 --- + +Rearranged bit fields in the second trie (AT) because the script code field +overflowed. Old code would have seen nonsensically low values for new, higher +script codes. +Modified bit fields in icu/source/common/uprops.h + ----------------------------------------------------------------------------- */ /* UDataInfo cf. udata.h */ @@ -290,46 +243,12 @@ static UDataInfo dataInfo={ 0, { 0x55, 0x50, 0x72, 0x6f }, /* dataFormat="UPro" */ - { 3, 2, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */ - { 4, 0, 0, 0 } /* dataVersion */ -}; - -/* definitions of expected data size limits */ -enum { - MAX_PROPS_COUNT=25000, - MAX_UCHAR_COUNT=10000 + { 5, 0, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */ + { 5, 1, 0, 0 } /* dataVersion */ }; static UNewTrie *pTrie=NULL; -/* props32[] contains unique properties words after compacting the array of properties */ -static uint32_t props32[MAX_PROPS_COUNT]; - -/* context pointer for compareProps() - temporarily holds a pointer to the trie data */ -static uint32_t *props; - -/* length of props32[] after compaction */ -static int32_t propsTop; - -/* exceptions values */ -static uint32_t exceptions[UPROPS_MAX_EXCEPTIONS_COUNT+20]; -static uint16_t exceptionsTop=0; - -/* Unicode characters, e.g. for special casing or decomposition */ -static UChar uchars[MAX_UCHAR_COUNT+20]; -static uint32_t ucharsTop=0; - -/* statistics */ -static uint16_t exceptionsCount=0; - -/* prototypes --------------------------------------------------------------- */ - -static int -compareProps(const void *l, const void *r); - -static uint32_t -addUChars(const UChar *s, uint32_t length); - /* -------------------------------------------------------------------------- */ extern void @@ -341,265 +260,109 @@ setUnicodeVersion(const char *v) { extern void initStore() { - pTrie=utrie_open(NULL, NULL, MAX_PROPS_COUNT, 0, TRUE); + pTrie=utrie_open(NULL, NULL, 40000, 0, 0, TRUE); if(pTrie==NULL) { fprintf(stderr, "error: unable to create a UNewTrie\n"); exit(U_MEMORY_ALLOCATION_ERROR); } - uprv_memset(props32, 0, sizeof(props32)); initAdditionalProperties(); } +extern void +exitStore() { + utrie_close(pTrie); + exitAdditionalProperties(); +} + +static uint32_t printNumericTypeValueError(Props *p) { + fprintf(stderr, "genprops error: unable to encode numeric type & value %d %ld/%lu E%d\n", + (int)p->numericType, (long)p->numericValue, (unsigned long)p->denominator, p->exponent); + exit(U_ILLEGAL_ARGUMENT_ERROR); + return 0; +} + /* store a character's properties ------------------------------------------- */ extern uint32_t makeProps(Props *p) { - uint32_t x; - int32_t value; - uint16_t count; - UBool isNumber; - - /* - * Simple ideas for reducing the number of bits for one character's - * properties: - * - * Some fields are only used for characters of certain - * general categories: - * - casing fields for letters and others, not for - * numbers & Mn - * + uppercase not for uppercase letters - * + lowercase not for lowercase letters - * + titlecase not for titlecase letters - * - * * most of the time, uppercase=titlecase - * - numeric fields for various digit & other types - * - canonical combining classes for non-spacing marks (Mn) - * * the above is not always true, for all three cases - * - * Using the same bits for alternate fields saves some space. - * - * For the canonical categories, there are only few actually used - * most of the time. - * They can be stored using 5 bits. - * - * In the BiDi categories, the 5 explicit codes are only ever - * assigned 1:1 to 5 well-known code points. Storing only one - * value for all "explicit codes" gets this down to 4 bits. - * Client code then needs to check for this special value - * and replace it by the real one using a 5-element table. - * - * The general categories Mn & Me, non-spacing & enclosing marks, - * are always NSM, and NSM are always of those categories. - * - * Digit values can often be derived from the code point value - * itself in a simple way. - * - */ - - /* count the case mappings and other values competing for the value bit field */ - x=0; - value=0; - count=0; - isNumber= (UBool)(genCategoryNames[p->generalCategory][0]=='N'); - - if(p->upperCase!=0) { - /* verify that no numbers and no Mn have case mappings */ - if(p->generalCategory==U_LOWERCASE_LETTER) { - value=(int32_t)p->code-(int32_t)p->upperCase; - } else { - x=UPROPS_EXCEPTION_BIT; + uint32_t den; + int32_t type, value, exp; + + /* encode numeric type & value */ + type=p->numericType; + value=p->numericValue; + den=p->denominator; + exp=p->exponent; + + if(den!=0) { + /* fraction */ + if( type!=U_NT_NUMERIC || + value<-1 || value==0 || value>UPROPS_FRACTION_MAX_NUM || + denlowerCase!=0) { - /* verify that no numbers and no Mn have case mappings */ - if(p->generalCategory==U_UPPERCASE_LETTER || p->generalCategory==U_TITLECASE_LETTER) { - value=(int32_t)p->lowerCase-(int32_t)p->code; - } else { - x=UPROPS_EXCEPTION_BIT; + type=UPROPS_NT_FRACTION; + + if(value==-1) { + value=0; } - ++count; - } - if(p->upperCase!=p->titleCase) { - x=UPROPS_EXCEPTION_BIT; - ++count; - } - if(p->numericType!=0) { - value=p->numericValue; - ++count; - } - if(p->denominator!=0) { - x=UPROPS_EXCEPTION_BIT; - ++count; - } - if(p->isMirrored) { - if(p->mirrorMapping!=0) { - value=(int32_t)p->mirrorMapping-(int32_t)p->code; + den-=UPROPS_FRACTION_DEN_OFFSET; + value=(value<specialCasing!=NULL) { - x=UPROPS_EXCEPTION_BIT; - ++count; - } - if(p->caseFolding!=NULL) { - x=UPROPS_EXCEPTION_BIT; - ++count; - } + type=UPROPS_NT_LARGE; - /* handle exceptions */ - if(count>1 || x!=0 || valuecode); - */ - } else if(valuecode, (long)value, (long)UPROPS_MIN_VALUE, (long)UPROPS_MAX_VALUE); - } else { - printf("*** U+%04x needs an exception because it has %u values\n", p->code, count); + if(exp<=UPROPS_LARGE_MAX_EXP) { + /* 1..9 * 10^(2..17) */ + exp-=UPROPS_LARGE_EXP_OFFSET; + } else { + /* 1 * 10^(18..33) */ + if(value!=1) { + return printNumericTypeValueError(p); } + value=0; + exp-=UPROPS_LARGE_EXP_OFFSET_EXTRA; + } + value=(value<UPROPS_MAX_SMALL_NUMBER) { + /* large value */ + if(type!=U_NT_NUMERIC) { + return printNumericTypeValueError(p); } + type=UPROPS_NT_LARGE; - ++exceptionsCount; - x=UPROPS_EXCEPTION_BIT; + /* split the value into mantissa and exponent, base 10 */ + while((value%10)==0) { + value/=10; + ++exp; + } + if(value>9) { + return printNumericTypeValueError(p); + } - /* allocate and create exception values */ - value=exceptionsTop; - if(value>=UPROPS_MAX_EXCEPTIONS_COUNT) { - fprintf(stderr, "genprops: out of exceptions memory at U+%06x. (%d exceeds allocated space)\n", - p->code, value); - exit(U_MEMORY_ALLOCATION_ERROR); - } else { - uint32_t first=0; - uint16_t length=1; + exp-=UPROPS_LARGE_EXP_OFFSET; + value=(value<upperCase!=0) { - first|=1; - exceptions[value+length++]=p->upperCase; - } - if(p->lowerCase!=0) { - first|=2; - exceptions[value+length++]=p->lowerCase; - } - if(p->upperCase!=p->titleCase) { - first|=4; - if(p->titleCase!=0) { - exceptions[value+length++]=p->titleCase; - } else { - exceptions[value+length++]=p->code; - } - } - if(p->numericType!=0) { - if(p->denominator==0) { - first|=0x10; - exceptions[value+length++]=(uint32_t)p->numericValue; - } else { - if(p->numericValue!=1) { - first|=0x10; - exceptions[value+length++]=(uint32_t)p->numericValue; - } - first|=0x20; - exceptions[value+length++]=p->denominator; - } - } - if(p->isMirrored) { - first|=0x40; - exceptions[value+length++]=p->mirrorMapping; - } - if(p->specialCasing!=NULL) { - first|=0x80; - if(p->specialCasing->isComplex) { - /* complex special casing */ - exceptions[value+length++]=0x80000000; - } else { - /* unconditional special casing */ - UChar u[128]; - uint32_t i; - uint16_t j, entry; - - i=1; - entry=0; - j=p->specialCasing->lowerCase[0]; - if(j>0) { - uprv_memcpy(u+1, p->specialCasing->lowerCase+1, 2*j); - i+=j; - entry=j; - } - j=p->specialCasing->upperCase[0]; - if(j>0) { - uprv_memcpy(u+i, p->specialCasing->upperCase+1, 2*j); - i+=j; - entry|=j<<5; - } - j=p->specialCasing->titleCase[0]; - if(j>0) { - uprv_memcpy(u+i, p->specialCasing->titleCase+1, 2*j); - i+=j; - entry|=j<<10; - } - u[0]=entry; - - exceptions[value+length++]=(i<<24)|addUChars(u, i); - } - } - if(p->caseFolding!=NULL) { - first|=0x100; - if(p->caseFolding->simple==0 && p->caseFolding->full[0]==0) { - /* special case folding, store only a marker */ - exceptions[value+length++]=0; - } else { - /* normal case folding with a simple and a full mapping */ - UChar u[128]; - uint16_t i; - - /* store the simple mapping into the first two UChars */ - i=0; - u[1]=0; - UTF_APPEND_CHAR_UNSAFE(u, i, p->caseFolding->simple); - - /* store the full mapping after that */ - i=p->caseFolding->full[0]; - if(i>0) { - uprv_memcpy(u+2, p->caseFolding->full+1, 2*i); - } - - exceptions[value+length++]=(i<<24)|addUChars(u, 2+i); - } - } - exceptions[value]=first; - exceptionsTop+=length; - } + /* } else normal value=0..0xff { */ } - /* put together the 32-bit word of encoded properties */ - x|= + /* encode the properties */ + return (uint32_t)p->generalCategory | - (uint32_t)p->bidi<isMirrored<numericType<0;) { - --i; - map[i]=(uint16_t)i; - } - - /* reorder */ - qsort(map, propsTop, 2, compareProps); - - /* - * Now invert the reordered table and compact it in the same step. - * The result will be props32[] having only unique properties words - * and stage3[] having indexes to them. - */ - newIndex=0; - for(i=0; i>2; - indexes[UPROPS_PROPS32_INDEX]=offset; /* uint32_t offset to props[] */ - - offset+=propsTop; - indexes[UPROPS_EXCEPTIONS_INDEX]=offset;/* uint32_t offset to exceptions[] */ - - offset+=exceptionsTop; /* uint32_t offset to the first unit after exceptions[] */ - indexes[UPROPS_EXCEPTIONS_TOP_INDEX]=offset; - - /* round up UChar count to 4-alignement */ - ucharsTop=(ucharsTop+1)&~1; - offset+=(uint16_t)(ucharsTop/2); /* uint32_t offset to the first unit after uchars[] */ + indexes[UPROPS_PROPS32_INDEX]= /* set indexes to the same offsets for empty */ + indexes[UPROPS_EXCEPTIONS_INDEX]= /* structures from the old format version 3 */ + indexes[UPROPS_EXCEPTIONS_TOP_INDEX]= /* so that less runtime code has to be changed */ indexes[UPROPS_ADDITIONAL_TRIE_INDEX]=offset; if(beVerbose) { - printf("trie size in bytes: %5u\n", trieSize); - printf("number of unique properties values: %5u\n", propsTop); - printf("number of code points with exceptions: %5u\n", exceptionsCount); - printf("size in bytes of exceptions: %5u\n", 4*exceptionsTop); - printf("number of UChars for special mappings: %5u\n", ucharsTop); + printf("trie size in bytes: %5u\n", (int)trieSize); } - additionalPropsSize=writeAdditionalData(additionalProps, sizeof(additionalProps), indexes); + if(csource) { + /* write .c file for hardcoded data */ + UTrie trie={ NULL }; + FILE *f; + + utrie_unserialize(&trie, trieBlock, trieSize, &errorCode); + if(U_FAILURE(errorCode)) { + fprintf( + stderr, + "genprops error: failed to utrie_unserialize(uprops.icu main trie) - %s\n", + u_errorName(errorCode)); + return; + } - size=4*offset+additionalPropsSize; /* total size of data */ - if(beVerbose) { - printf("data size: %6lu\n", (unsigned long)size); - } + f=usrc_create(dataDir, "uchar_props_data.c"); + if(f!=NULL) { + usrc_writeArray(f, + "static const UVersionInfo formatVersion={", + dataInfo.formatVersion, 8, 4, + "};\n\n"); + usrc_writeArray(f, + "static const UVersionInfo dataVersion={", + dataInfo.dataVersion, 8, 4, + "};\n\n"); + usrc_writeUTrieArrays(f, + "static const uint16_t propsTrie_index[%ld]={\n", NULL, + &trie, + "\n};\n\n"); + usrc_writeUTrieStruct(f, + "static const UTrie propsTrie={\n", + &trie, "propsTrie_index", NULL, NULL, + "};\n\n"); + + additionalPropsSize=writeAdditionalData(f, additionalProps, sizeof(additionalProps), indexes); + size=4*offset+additionalPropsSize; /* total size of data */ + + usrc_writeArray(f, + "static const int32_t indexes[UPROPS_INDEX_COUNT]={", + indexes, 32, UPROPS_INDEX_COUNT, + "};\n\n"); + fclose(f); + } + } else { + /* write the data */ + pData=udata_create(dataDir, DATA_TYPE, DATA_NAME, &dataInfo, + haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode); + if(U_FAILURE(errorCode)) { + fprintf(stderr, "genprops: unable to create data memory, %s\n", u_errorName(errorCode)); + exit(errorCode); + } - /* write the data */ - pData=udata_create(dataDir, DATA_TYPE, U_ICUDATA_NAME "_" DATA_NAME, &dataInfo, - haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode); - if(U_FAILURE(errorCode)) { - fprintf(stderr, "genprops: unable to create data memory, %s\n", u_errorName(errorCode)); - exit(errorCode); - } + additionalPropsSize=writeAdditionalData(NULL, additionalProps, sizeof(additionalProps), indexes); + size=4*offset+additionalPropsSize; /* total size of data */ - udata_writeBlock(pData, indexes, sizeof(indexes)); - udata_writeBlock(pData, trieBlock, trieSize); - udata_writeBlock(pData, props32, 4*propsTop); - udata_writeBlock(pData, exceptions, 4*exceptionsTop); - udata_writeBlock(pData, uchars, 2*ucharsTop); - udata_writeBlock(pData, additionalProps, additionalPropsSize); + udata_writeBlock(pData, indexes, sizeof(indexes)); + udata_writeBlock(pData, trieBlock, trieSize); + udata_writeBlock(pData, additionalProps, additionalPropsSize); - /* finish up */ - dataLength=udata_finish(pData, &errorCode); - if(U_FAILURE(errorCode)) { - fprintf(stderr, "genprops: error %d writing the output file\n", errorCode); - exit(errorCode); - } + /* finish up */ + dataLength=udata_finish(pData, &errorCode); + if(U_FAILURE(errorCode)) { + fprintf(stderr, "genprops: error %d writing the output file\n", errorCode); + exit(errorCode); + } - if(dataLength!=(long)size) { - fprintf(stderr, "genprops: data length %ld != calculated size %lu\n", - dataLength, (unsigned long)size); - exit(U_INTERNAL_PROGRAM_ERROR); + if(dataLength!=(long)size) { + fprintf(stderr, "genprops: data length %ld != calculated size %lu\n", + dataLength, (unsigned long)size); + exit(U_INTERNAL_PROGRAM_ERROR); + } } - utrie_close(pTrie); -} - -/* helpers ------------------------------------------------------------------ */ - -static uint32_t -addUChars(const UChar *s, uint32_t length) { - uint32_t top=(uint16_t)(ucharsTop+length); - UChar *p; - - if(top>=MAX_UCHAR_COUNT) { - fprintf(stderr, "genprops: out of UChars memory\n"); - exit(U_MEMORY_ALLOCATION_ERROR); + if(beVerbose) { + printf("data size: %6lu\n", (unsigned long)size); } - p=uchars+ucharsTop; - uprv_memcpy(p, s, 2*length); - ucharsTop=top; - return (uint32_t)(p-uchars); } /*