X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/374ca955a76ecab1204ca8bfa63ff9238d998416..d5d484b0fbe924d3663b177965538d517ee412c1:/icuSources/tools/genprops/props2.c?ds=sidebyside diff --git a/icuSources/tools/genprops/props2.c b/icuSources/tools/genprops/props2.c index 331f821f..9f18a11b 100644 --- a/icuSources/tools/genprops/props2.c +++ b/icuSources/tools/genprops/props2.c @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2002-2004, International Business Machines +* Copyright (C) 2002-2006, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -27,6 +27,7 @@ #include "uprops.h" #include "propsvec.h" #include "uparse.h" +#include "writesrc.h" #include "genprops.h" #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) @@ -102,11 +103,6 @@ numericLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode); -static void U_CALLCONV -bidiClassLineFn(void *context, - char *fields[][2], int32_t fieldCount, - UErrorCode *pErrorCode); - /* parse files with single enumerated properties ---------------------------- */ struct SingleEnum { @@ -134,6 +130,24 @@ static const SingleEnum blockSingleEnum={ 0, UPROPS_BLOCK_SHIFT, UPROPS_BLOCK_MASK }; +static const SingleEnum graphemeClusterBreakSingleEnum={ + "GraphemeBreakProperty", "Grapheme_Cluster_Break", + UCHAR_GRAPHEME_CLUSTER_BREAK, + 2, UPROPS_GCB_SHIFT, UPROPS_GCB_MASK +}; + +static const SingleEnum wordBreakSingleEnum={ + "WordBreakProperty", "Word_Break", + UCHAR_WORD_BREAK, + 2, UPROPS_WB_SHIFT, UPROPS_WB_MASK +}; + +static const SingleEnum sentenceBreakSingleEnum={ + "SentenceBreakProperty", "Sentence_Break", + UCHAR_SENTENCE_BREAK, + 2, UPROPS_SB_SHIFT, UPROPS_SB_MASK +}; + static const SingleEnum lineBreakSingleEnum={ "LineBreak", "line break", UCHAR_LINE_BREAK, @@ -146,18 +160,6 @@ static const SingleEnum eawSingleEnum={ 0, UPROPS_EA_SHIFT, UPROPS_EA_MASK }; -static const SingleEnum jtSingleEnum={ - "DerivedJoiningType", "joining type", - UCHAR_JOINING_TYPE, - 2, UPROPS_JT_SHIFT, UPROPS_JT_MASK -}; - -static const SingleEnum jgSingleEnum={ - "DerivedJoiningGroup", "joining group", - UCHAR_JOINING_GROUP, - 2, UPROPS_JG_SHIFT, UPROPS_JG_MASK -}; - static void U_CALLCONV singleEnumLineFn(void *context, char *fields[][2], int32_t fieldCount, @@ -246,8 +248,6 @@ typedef struct Binaries Binaries; static const Binary propListNames[]={ { "White_Space", 1, UPROPS_WHITE_SPACE }, - { "Bidi_Control", 1, UPROPS_BIDI_CONTROL }, - { "Join_Control", 1, UPROPS_JOIN_CONTROL }, { "Dash", 1, UPROPS_DASH }, { "Hyphen", 1, UPROPS_HYPHEN }, { "Quotation_Mark", 1, UPROPS_QUOTATION_MARK }, @@ -264,12 +264,15 @@ propListNames[]={ { "Radical", 1, UPROPS_RADICAL }, { "Unified_Ideograph", 1, UPROPS_UNIFIED_IDEOGRAPH }, { "Deprecated", 1, UPROPS_DEPRECATED }, - { "Soft_Dotted", 1, UPROPS_SOFT_DOTTED }, { "Logical_Order_Exception", 1, UPROPS_LOGICAL_ORDER_EXCEPTION }, /* new properties in Unicode 4.0.1 */ { "STerm", 2, UPROPS_V2_S_TERM }, - { "Variation_Selector", 2, UPROPS_V2_VARIATION_SELECTOR } + { "Variation_Selector", 2, UPROPS_V2_VARIATION_SELECTOR }, + + /* new properties in Unicode 4.1 */ + { "Pattern_Syntax", 2, UPROPS_V2_PATTERN_SYNTAX }, + { "Pattern_White_Space", 2, UPROPS_V2_PATTERN_WHITE_SPACE } }; static const Binaries @@ -285,15 +288,19 @@ derCorePropsNames[]={ /* before Unicode 4/ICU 2.6/format version 3.2, these used to be Other_XYZ from PropList.txt */ { "Math", 1, UPROPS_MATH }, { "Alphabetic", 1, UPROPS_ALPHABETIC }, - { "Lowercase", 1, UPROPS_LOWERCASE }, - { "Uppercase", 1, UPROPS_UPPERCASE }, { "Grapheme_Extend", 1, UPROPS_GRAPHEME_EXTEND }, { "Default_Ignorable_Code_Point", 1, UPROPS_DEFAULT_IGNORABLE_CODE_POINT }, /* new properties bits in ICU 2.6/format version 3.2 */ { "ID_Start", 1, UPROPS_ID_START }, { "ID_Continue", 1, UPROPS_ID_CONTINUE }, - { "Grapheme_Base", 1, UPROPS_GRAPHEME_BASE } + { "Grapheme_Base", 1, UPROPS_GRAPHEME_BASE }, + + /* + * Unicode 5/ICU 3.6 moves Grapheme_Link from PropList.txt + * to DerivedCoreProperties.txt and deprecates it. + */ + { "Grapheme_Link", 1, UPROPS_GRAPHEME_LINK } }; static const Binaries @@ -340,7 +347,9 @@ binariesLineFn(void *context, for(i=0;; ++i) { if(i==bin->binariesCount) { /* ignore unrecognized properties */ - addIgnoredProp(s, fields[1][1]); + if(beVerbose) { + addIgnoredProp(s, fields[1][1]); + } return; } if(isToken(bin->binaries[i].propName, s)) { @@ -382,8 +391,10 @@ parseBinariesFile(char *filename, char *basename, const char *suffix, fprintf(stderr, "error parsing %s.txt: %s\n", bin->ucdFile, u_errorName(*pErrorCode)); } - for(i=0; iucdFile); + if(beVerbose) { + for(i=0; iucdFile); + } } } @@ -394,6 +405,12 @@ initAdditionalProperties() { pv=upvec_open(UPROPS_VECTOR_WORDS, 20000); } +U_CFUNC void +exitAdditionalProperties() { + utrie_close(trie); + upvec_close(pv); +} + U_CFUNC void generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pErrorCode) { char *basename; @@ -405,9 +422,6 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr /* add Han numeric types & values */ parseMultiFieldFile(filename, basename, "DerivedNumericValues", suffix, 2, numericLineFn, pErrorCode); - /* set proper bidi class for unassigned code points (Cn) */ - parseTwoFieldFile(filename, basename, "DerivedBidiClass", suffix, bidiClassLineFn, pErrorCode); - parseTwoFieldFile(filename, basename, "DerivedAge", suffix, ageLineFn, pErrorCode); /* @@ -432,6 +446,12 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr parseBinariesFile(filename, basename, suffix, &derCorePropsBinaries, pErrorCode); + parseSingleEnumFile(filename, basename, suffix, &graphemeClusterBreakSingleEnum, pErrorCode); + + parseSingleEnumFile(filename, basename, suffix, &wordBreakSingleEnum, pErrorCode); + + parseSingleEnumFile(filename, basename, suffix, &sentenceBreakSingleEnum, pErrorCode); + /* * LineBreak-4.0.0.txt: * - All code points, assigned and unassigned, that are not listed @@ -441,10 +461,6 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr */ parseSingleEnumFile(filename, basename, suffix, &lineBreakSingleEnum, pErrorCode); - parseSingleEnumFile(filename, basename, suffix, &jtSingleEnum, pErrorCode); - - parseSingleEnumFile(filename, basename, suffix, &jgSingleEnum, pErrorCode); - /* * Preset East Asian Width defaults: * @@ -481,7 +497,7 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr return; } - pvCount=upvec_toTrie(pv, trie, pErrorCode); + pvCount=upvec_compact(pv, upvec_compactToTrieHandler, trie, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genprops error: unable to build trie for additional properties: %s\n", u_errorName(*pErrorCode)); exit(*pErrorCode); @@ -504,8 +520,13 @@ ageLineFn(void *context, } ++limit; - /* parse version number */ + /* ignore "unassigned" (the default is already set to 0.0) */ s=(char *)u_skipWhitespace(fields[1][0]); + if(0==uprv_strncmp(s, "unassigned", 10)) { + return; + } + + /* parse version number */ value=(uint32_t)uprv_strtoul(s, &end, 10); if(s==end || value==0 || value>15 || (*end!='.' && *end!=' ' && *end!='\t' && *end!=0)) { fprintf(stderr, "genprops: syntax error in DerivedAge.txt field 1 at %s\n", fields[1][0]); @@ -538,7 +559,7 @@ static void U_CALLCONV numericLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode) { - Props newProps; + Props newProps={ 0 }; char *s, *end; uint32_t start, limit, value, oldProps32; int32_t oldType; @@ -575,11 +596,14 @@ numericLineFn(void *context, /* try large powers of 10 first, may otherwise overflow strtoul() */ if(0==uprv_strncmp(s, "10000000000", 11)) { /* large powers of 10 are encoded in a special way, see store.c */ - value=0x7fffff00; + uint8_t exp=0; + end=s; while(*(++end)=='0') { - ++value; + ++exp; } + value=1; + newProps.exponent=exp; } else { /* normal number parsing */ value=(uint32_t)uprv_strtoul(s, &end, 10); @@ -599,130 +623,93 @@ numericLineFn(void *context, * specific properties for single characters. */ + /* set the new numeric type and value */ + newProps.numericType=(uint8_t)U_NT_NUMERIC; /* assumed numeric type, see Unicode 4.0.1 comment */ + newProps.numericValue=(int32_t)value; /* newly parsed numeric value */ + /* the exponent may have been set above */ + value=makeProps(&newProps); + for(; start