X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..d5d484b0fbe924d3663b177965538d517ee412c1:/icuSources/tools/genprops/props2.c diff --git a/icuSources/tools/genprops/props2.c b/icuSources/tools/genprops/props2.c index 8dc53441..9f18a11b 100644 --- a/icuSources/tools/genprops/props2.c +++ b/icuSources/tools/genprops/props2.c @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2002-2003, International Business Machines +* Copyright (C) 2002-2006, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -27,6 +27,7 @@ #include "uprops.h" #include "propsvec.h" #include "uparse.h" +#include "writesrc.h" #include "genprops.h" #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) @@ -102,11 +103,6 @@ numericLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode); -static void U_CALLCONV -bidiClassLineFn(void *context, - char *fields[][2], int32_t fieldCount, - UErrorCode *pErrorCode); - /* parse files with single enumerated properties ---------------------------- */ struct SingleEnum { @@ -134,6 +130,24 @@ static const SingleEnum blockSingleEnum={ 0, UPROPS_BLOCK_SHIFT, UPROPS_BLOCK_MASK }; +static const SingleEnum graphemeClusterBreakSingleEnum={ + "GraphemeBreakProperty", "Grapheme_Cluster_Break", + UCHAR_GRAPHEME_CLUSTER_BREAK, + 2, UPROPS_GCB_SHIFT, UPROPS_GCB_MASK +}; + +static const SingleEnum wordBreakSingleEnum={ + "WordBreakProperty", "Word_Break", + UCHAR_WORD_BREAK, + 2, UPROPS_WB_SHIFT, UPROPS_WB_MASK +}; + +static const SingleEnum sentenceBreakSingleEnum={ + "SentenceBreakProperty", "Sentence_Break", + UCHAR_SENTENCE_BREAK, + 2, UPROPS_SB_SHIFT, UPROPS_SB_MASK +}; + static const SingleEnum lineBreakSingleEnum={ "LineBreak", "line break", UCHAR_LINE_BREAK, @@ -146,18 +160,6 @@ static const SingleEnum eawSingleEnum={ 0, UPROPS_EA_SHIFT, UPROPS_EA_MASK }; -static const SingleEnum jtSingleEnum={ - "DerivedJoiningType", "joining type", - UCHAR_JOINING_TYPE, - 2, UPROPS_JT_SHIFT, UPROPS_JT_MASK -}; - -static const SingleEnum jgSingleEnum={ - "DerivedJoiningGroup", "joining group", - UCHAR_JOINING_GROUP, - 2, UPROPS_JG_SHIFT, UPROPS_JG_MASK -}; - static void U_CALLCONV singleEnumLineFn(void *context, char *fields[][2], int32_t fieldCount, @@ -199,7 +201,7 @@ singleEnumLineFn(void *context, uv=(uint32_t)(value<vecShift); if((uv&sen->vecMask)!=uv) { fprintf(stderr, "genprops error: %s value overflow (0x%x) at %s\n", - sen->propName, uv, s); + sen->propName, (int)uv, s); exit(U_INTERNAL_PROGRAM_ERROR); } @@ -246,8 +248,6 @@ typedef struct Binaries Binaries; static const Binary propListNames[]={ { "White_Space", 1, UPROPS_WHITE_SPACE }, - { "Bidi_Control", 1, UPROPS_BIDI_CONTROL }, - { "Join_Control", 1, UPROPS_JOIN_CONTROL }, { "Dash", 1, UPROPS_DASH }, { "Hyphen", 1, UPROPS_HYPHEN }, { "Quotation_Mark", 1, UPROPS_QUOTATION_MARK }, @@ -264,8 +264,15 @@ propListNames[]={ { "Radical", 1, UPROPS_RADICAL }, { "Unified_Ideograph", 1, UPROPS_UNIFIED_IDEOGRAPH }, { "Deprecated", 1, UPROPS_DEPRECATED }, - { "Soft_Dotted", 1, UPROPS_SOFT_DOTTED }, - { "Logical_Order_Exception", 1, UPROPS_LOGICAL_ORDER_EXCEPTION } + { "Logical_Order_Exception", 1, UPROPS_LOGICAL_ORDER_EXCEPTION }, + + /* new properties in Unicode 4.0.1 */ + { "STerm", 2, UPROPS_V2_S_TERM }, + { "Variation_Selector", 2, UPROPS_V2_VARIATION_SELECTOR }, + + /* new properties in Unicode 4.1 */ + { "Pattern_Syntax", 2, UPROPS_V2_PATTERN_SYNTAX }, + { "Pattern_White_Space", 2, UPROPS_V2_PATTERN_WHITE_SPACE } }; static const Binaries @@ -281,15 +288,19 @@ derCorePropsNames[]={ /* before Unicode 4/ICU 2.6/format version 3.2, these used to be Other_XYZ from PropList.txt */ { "Math", 1, UPROPS_MATH }, { "Alphabetic", 1, UPROPS_ALPHABETIC }, - { "Lowercase", 1, UPROPS_LOWERCASE }, - { "Uppercase", 1, UPROPS_UPPERCASE }, { "Grapheme_Extend", 1, UPROPS_GRAPHEME_EXTEND }, { "Default_Ignorable_Code_Point", 1, UPROPS_DEFAULT_IGNORABLE_CODE_POINT }, /* new properties bits in ICU 2.6/format version 3.2 */ { "ID_Start", 1, UPROPS_ID_START }, { "ID_Continue", 1, UPROPS_ID_CONTINUE }, - { "Grapheme_Base", 1, UPROPS_GRAPHEME_BASE } + { "Grapheme_Base", 1, UPROPS_GRAPHEME_BASE }, + + /* + * Unicode 5/ICU 3.6 moves Grapheme_Link from PropList.txt + * to DerivedCoreProperties.txt and deprecates it. + */ + { "Grapheme_Link", 1, UPROPS_GRAPHEME_LINK } }; static const Binaries @@ -336,7 +347,9 @@ binariesLineFn(void *context, for(i=0;; ++i) { if(i==bin->binariesCount) { /* ignore unrecognized properties */ - addIgnoredProp(s, fields[1][1]); + if(beVerbose) { + addIgnoredProp(s, fields[1][1]); + } return; } if(isToken(bin->binaries[i].propName, s)) { @@ -346,7 +359,7 @@ binariesLineFn(void *context, if(bin->binaries[i].vecShift>=32) { fprintf(stderr, "genprops error: shift value %d>=32 for %s %s\n", - bin->binaries[i].vecShift, bin->ucdFile, bin->binaries[i].propName); + (int)bin->binaries[i].vecShift, bin->ucdFile, bin->binaries[i].propName); exit(U_INTERNAL_PROGRAM_ERROR); } uv=U_MASK(bin->binaries[i].vecShift); @@ -378,8 +391,10 @@ parseBinariesFile(char *filename, char *basename, const char *suffix, fprintf(stderr, "error parsing %s.txt: %s\n", bin->ucdFile, u_errorName(*pErrorCode)); } - for(i=0; iucdFile); + if(beVerbose) { + for(i=0; iucdFile); + } } } @@ -390,6 +405,12 @@ initAdditionalProperties() { pv=upvec_open(UPROPS_VECTOR_WORDS, 20000); } +U_CFUNC void +exitAdditionalProperties() { + utrie_close(trie); + upvec_close(pv); +} + U_CFUNC void generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pErrorCode) { char *basename; @@ -399,10 +420,7 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr /* process various UCD .txt files */ /* add Han numeric types & values */ - parseMultiFieldFile(filename, basename, "DerivedNumericValues", suffix, 3, numericLineFn, pErrorCode); - - /* set proper bidi class for unassigned code points (Cn) */ - parseTwoFieldFile(filename, basename, "DerivedBidiClass", suffix, bidiClassLineFn, pErrorCode); + parseMultiFieldFile(filename, basename, "DerivedNumericValues", suffix, 2, numericLineFn, pErrorCode); parseTwoFieldFile(filename, basename, "DerivedAge", suffix, ageLineFn, pErrorCode); @@ -428,6 +446,12 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr parseBinariesFile(filename, basename, suffix, &derCorePropsBinaries, pErrorCode); + parseSingleEnumFile(filename, basename, suffix, &graphemeClusterBreakSingleEnum, pErrorCode); + + parseSingleEnumFile(filename, basename, suffix, &wordBreakSingleEnum, pErrorCode); + + parseSingleEnumFile(filename, basename, suffix, &sentenceBreakSingleEnum, pErrorCode); + /* * LineBreak-4.0.0.txt: * - All code points, assigned and unassigned, that are not listed @@ -437,10 +461,6 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr */ parseSingleEnumFile(filename, basename, suffix, &lineBreakSingleEnum, pErrorCode); - parseSingleEnumFile(filename, basename, suffix, &jtSingleEnum, pErrorCode); - - parseSingleEnumFile(filename, basename, suffix, &jgSingleEnum, pErrorCode); - /* * Preset East Asian Width defaults: * @@ -470,14 +490,14 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr /* parse EastAsianWidth.txt */ parseSingleEnumFile(filename, basename, suffix, &eawSingleEnum, pErrorCode); - trie=utrie_open(NULL, NULL, 50000, 0, TRUE); + trie=utrie_open(NULL, NULL, 50000, 0, 0, TRUE); if(trie==NULL) { *pErrorCode=U_MEMORY_ALLOCATION_ERROR; upvec_close(pv); return; } - pvCount=upvec_toTrie(pv, trie, pErrorCode); + pvCount=upvec_compact(pv, upvec_compactToTrieHandler, trie, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genprops error: unable to build trie for additional properties: %s\n", u_errorName(*pErrorCode)); exit(*pErrorCode); @@ -500,8 +520,13 @@ ageLineFn(void *context, } ++limit; - /* parse version number */ + /* ignore "unassigned" (the default is already set to 0.0) */ s=(char *)u_skipWhitespace(fields[1][0]); + if(0==uprv_strncmp(s, "unassigned", 10)) { + return; + } + + /* parse version number */ value=(uint32_t)uprv_strtoul(s, &end, 10); if(s==end || value==0 || value>15 || (*end!='.' && *end!=' ' && *end!='\t' && *end!=0)) { fprintf(stderr, "genprops: syntax error in DerivedAge.txt field 1 at %s\n", fields[1][0]); @@ -534,10 +559,10 @@ static void U_CALLCONV numericLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode) { - Props newProps; + Props newProps={ 0 }; char *s, *end; uint32_t start, limit, value, oldProps32; - int32_t type, oldType; + int32_t oldType; char c; UBool isFraction; @@ -571,11 +596,14 @@ numericLineFn(void *context, /* try large powers of 10 first, may otherwise overflow strtoul() */ if(0==uprv_strncmp(s, "10000000000", 11)) { /* large powers of 10 are encoded in a special way, see store.c */ - value=0x7fffff00; + uint8_t exp=0; + end=s; while(*(++end)=='0') { - ++value; + ++exp; } + value=1; + newProps.exponent=exp; } else { /* normal number parsing */ value=(uint32_t)uprv_strtoul(s, &end, 10); @@ -586,144 +614,103 @@ numericLineFn(void *context, } } - /* parse numeric type */ - s=trimTerminateField(fields[2][0], fields[2][1]); - type=u_getPropertyValueEnum(UCHAR_NUMERIC_TYPE, s); - if(type<=0) { - fprintf(stderr, "genprops error: unknown numeric type in DerivedNumericValues.txt field 1 at %s\n", s); - exit(U_PARSE_ERROR); - } + /* + * Unicode 4.0.1 removes the third column that used to list the numeric type. + * Assume that either the data is the same as in UnicodeData.txt, + * or else that the numeric type is "numeric". + * This should work because we only expect to add numeric values for + * Han characters; for those, UnicodeData.txt lists only ranges without + * specific properties for single characters. + */ + + /* set the new numeric type and value */ + newProps.numericType=(uint8_t)U_NT_NUMERIC; /* assumed numeric type, see Unicode 4.0.1 comment */ + newProps.numericValue=(int32_t)value; /* newly parsed numeric value */ + /* the exponent may have been set above */ + value=makeProps(&newProps); for(; start