X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/51004dcb01e06fef634b61be77ed73dd61cb6db9..5ea0322b6ab2af986e4c764284141380031dd014:/icuSources/tools/genrb/parse.cpp diff --git a/icuSources/tools/genrb/parse.cpp b/icuSources/tools/genrb/parse.cpp index 55464115..34b94aec 100644 --- a/icuSources/tools/genrb/parse.cpp +++ b/icuSources/tools/genrb/parse.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 1998-2012, International Business Machines +* Copyright (C) 1998-2015, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -18,7 +18,17 @@ ******************************************************************************* */ -#include "ucol_imp.h" +// Safer use of UnicodeString. +#ifndef UNISTR_FROM_CHAR_EXPLICIT +# define UNISTR_FROM_CHAR_EXPLICIT explicit +#endif + +// Less important, but still a good idea. +#ifndef UNISTR_FROM_STRING_EXPLICIT +# define UNISTR_FROM_STRING_EXPLICIT explicit +#endif + +#include #include "parse.h" #include "errmsg.h" #include "uhash.h" @@ -30,9 +40,22 @@ #include "reslist.h" #include "rbt_pars.h" #include "genrb.h" +#include "unicode/stringpiece.h" +#include "unicode/unistr.h" #include "unicode/ustring.h" #include "unicode/uscript.h" +#include "unicode/utf16.h" #include "unicode/putil.h" +#include "charstr.h" +#include "collationbuilder.h" +#include "collationdata.h" +#include "collationdatareader.h" +#include "collationdatawriter.h" +#include "collationfastlatinbuilder.h" +#include "collationinfo.h" +#include "collationroot.h" +#include "collationruleparser.h" +#include "collationtailoring.h" #include /* Number of tokens to read ahead of the current stream position */ @@ -51,6 +74,13 @@ #define OPENSQBRACKET 0x005B #define CLOSESQBRACKET 0x005D +using icu::CharString; +using icu::LocalMemory; +using icu::LocalPointer; +using icu::LocalUCHARBUFPointer; +using icu::StringPiece; +using icu::UnicodeString; + struct Lookahead { enum ETokenType type; @@ -84,11 +114,11 @@ typedef struct { uint32_t inputdirLength; const char *outputdir; uint32_t outputdirLength; + const char *filename; UBool makeBinaryCollation; + UBool omitCollationRules; } ParseState; -static UBool gOmitCollationRules = FALSE; - typedef struct SResource * ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status); @@ -323,7 +353,7 @@ parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct USt } uprv_strcat(filename, cs); - if(gOmitCollationRules) { + if(state->omitCollationRules) { return res_none(); } @@ -494,7 +524,7 @@ parseTransliterator(ParseState* state, char *tag, uint32_t startline, const stru return result; } -static struct SResource* dependencyArray = NULL; +static ArrayResource* dependencyArray = NULL; static struct SResource * parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) @@ -549,7 +579,7 @@ parseDependency(ParseState* state, char *tag, uint32_t startline, const struct U } elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status); - array_add(dependencyArray, elem, status); + dependencyArray->add(elem); if (U_FAILURE(*status)) { @@ -624,21 +654,17 @@ parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UStrin return result; } -typedef struct{ - const char* inputDir; - const char* outputDir; -} GenrbData; +#if !UCONFIG_NO_COLLATION + +namespace { static struct SResource* resLookup(struct SResource* res, const char* key){ - struct SResource *current = NULL; - struct SResTable *list; - if (res == res_none()) { + if (res == res_none() || !res->isTable()) { return NULL; } - list = &(res->u.fTable); - - current = list->fFirst; + TableResource *list = static_cast(res); + SResource *current = list->fFirst; while (current != NULL) { if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) { return current; @@ -648,51 +674,41 @@ static struct SResource* resLookup(struct SResource* res, const char* key){ return NULL; } -static const UChar* importFromDataFile(void* context, const char* locale, const char* type, int32_t* pLength, UErrorCode* status){ - struct SRBRoot *data = NULL; - UCHARBUF *ucbuf = NULL; - GenrbData* genrbdata = (GenrbData*) context; - int localeLength = strlen(locale); - char* filename = (char*)uprv_malloc(localeLength+5); - char *inputDirBuf = NULL; - char *openFileName = NULL; - const char* cp = ""; - UChar* urules = NULL; - int32_t urulesLength = 0; - int32_t i = 0; - int32_t dirlen = 0; - int32_t filelen = 0; - struct SResource* root; - struct SResource* collations; - struct SResource* collation; - struct SResource* sequence; - - memcpy(filename, locale, localeLength); - for(i = 0; i < localeLength; i++){ - if(filename[i] == '-'){ - filename[i] = '_'; - } - } - filename[localeLength] = '.'; - filename[localeLength+1] = 't'; - filename[localeLength+2] = 'x'; - filename[localeLength+3] = 't'; - filename[localeLength+4] = 0; +class GenrbImporter : public icu::CollationRuleParser::Importer { +public: + GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out) {} + virtual ~GenrbImporter(); + virtual void getRules( + const char *localeID, const char *collationType, + UnicodeString &rules, + const char *&errorReason, UErrorCode &errorCode); + +private: + const char *inputDir; + const char *outputDir; +}; +GenrbImporter::~GenrbImporter() {} - if (status==NULL || U_FAILURE(*status)) { - return NULL; +void +GenrbImporter::getRules( + const char *localeID, const char *collationType, + UnicodeString &rules, + const char *& /*errorReason*/, UErrorCode &errorCode) { + CharString filename(localeID, errorCode); + for(int32_t i = 0; i < filename.length(); i++){ + if(filename[i] == '-'){ + filename.data()[i] = '_'; + } } - if(filename==NULL){ - *status=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - }else{ - filelen = (int32_t)uprv_strlen(filename); + filename.append(".txt", errorCode); + if (U_FAILURE(errorCode)) { + return; } - if(genrbdata->inputDir == NULL) { - const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR); - openFileName = (char *) uprv_malloc(dirlen + filelen + 2); - openFileName[0] = '\0'; + CharString inputDirBuf; + CharString openFileName; + if(inputDir == NULL) { + const char *filenameBegin = uprv_strrchr(filename.data(), U_FILE_SEP_CHAR); if (filenameBegin != NULL) { /* * When a filename ../../../data/root.txt is specified, @@ -700,35 +716,19 @@ static const UChar* importFromDataFile(void* context, const char* locale, const * This is very important when the resource file includes * another file, like UCARules.txt or thaidict.brk. */ - int32_t filenameSize = (int32_t)(filenameBegin - filename + 1); - inputDirBuf = uprv_strncpy((char *)uprv_malloc(filenameSize), filename, filenameSize); - - /* test for NULL */ - if(inputDirBuf == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto finish; - } - - inputDirBuf[filenameSize - 1] = 0; - genrbdata->inputDir = inputDirBuf; - dirlen = (int32_t)uprv_strlen(genrbdata->inputDir); + StringPiece dir = filename.toStringPiece(); + const char *filenameLimit = filename.data() + filename.length(); + dir.remove_suffix((int32_t)(filenameLimit - filenameBegin)); + inputDirBuf.append(dir, errorCode); + inputDir = inputDirBuf.data(); } }else{ - dirlen = (int32_t)uprv_strlen(genrbdata->inputDir); + int32_t dirlen = (int32_t)uprv_strlen(inputDir); - if(genrbdata->inputDir[dirlen-1] != U_FILE_SEP_CHAR) { - openFileName = (char *) uprv_malloc(dirlen + filelen + 2); - - /* test for NULL */ - if(openFileName == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto finish; - } - - openFileName[0] = '\0'; + if((filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')) { /* * append the input dir to openFileName if the first char in - * filename is not file seperation char and the last char input directory is not '.'. + * filename is not file separator char and the last char input directory is not '.'. * This is to support : * genrb -s. /home/icu/data * genrb -s. icu/data @@ -737,70 +737,49 @@ static const UChar* importFromDataFile(void* context, const char* locale, const * user should use * genrb -s. icu/data --- start from CWD and look in icu/data dir */ - if( (filename[0] != U_FILE_SEP_CHAR) && (genrbdata->inputDir[dirlen-1] !='.')){ - uprv_strcpy(openFileName, genrbdata->inputDir); - openFileName[dirlen] = U_FILE_SEP_CHAR; - } - openFileName[dirlen + 1] = '\0'; - } else { - openFileName = (char *) uprv_malloc(dirlen + filelen + 1); - - /* test for NULL */ - if(openFileName == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto finish; + openFileName.append(inputDir, dirlen, errorCode); + if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) { + openFileName.append(U_FILE_SEP_CHAR, errorCode); } - - uprv_strcpy(openFileName, genrbdata->inputDir); - } } - uprv_strcat(openFileName, filename); - /* printf("%s\n", openFileName); */ - *status = U_ZERO_ERROR; - ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, status); - - if(*status == U_FILE_ACCESS_ERROR) { - - fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName); - goto finish; + openFileName.append(filename, errorCode); + if(U_FAILURE(errorCode)) { + return; + } + // printf("GenrbImporter::getRules(%s, %s) reads %s\n", localeID, collationType, openFileName.data()); + const char* cp = ""; + LocalUCHARBUFPointer ucbuf( + ucbuf_open(openFileName.data(), &cp, getShowWarning(), TRUE, &errorCode)); + if(errorCode == U_FILE_ACCESS_ERROR) { + fprintf(stderr, "couldn't open file %s\n", openFileName.data()); + return; } - if (ucbuf == NULL || U_FAILURE(*status)) { - fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(*status)); - goto finish; + if (ucbuf.isNull() || U_FAILURE(errorCode)) { + fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName.data(), u_errorName(errorCode)); + return; } /* Parse the data into an SRBRoot */ - data = parse(ucbuf, genrbdata->inputDir, genrbdata->outputDir, FALSE, status); + struct SRBRoot *data = + parse(ucbuf.getAlias(), inputDir, outputDir, filename.data(), FALSE, FALSE, &errorCode); + if (U_FAILURE(errorCode)) { + return; + } - root = data->fRoot; - collations = resLookup(root, "collations"); + struct SResource *root = data->fRoot; + struct SResource *collations = resLookup(root, "collations"); if (collations != NULL) { - collation = resLookup(collations, type); + struct SResource *collation = resLookup(collations, collationType); if (collation != NULL) { - sequence = resLookup(collation, "Sequence"); - if (sequence != NULL) { - urules = sequence->u.fString.fChars; - urulesLength = sequence->u.fString.fLength; - *pLength = urulesLength; + struct SResource *sequence = resLookup(collation, "Sequence"); + if (sequence != NULL && sequence->isString()) { + // No string pointer aliasing so that we need not hold onto the resource bundle. + StringResource *sr = static_cast(sequence); + rules = sr->fString; } } } - -finish: - if (inputDirBuf != NULL) { - uprv_free(inputDirBuf); - } - - if (openFileName != NULL) { - uprv_free(openFileName); - } - - if(ucbuf) { - ucbuf_close(ucbuf); - } - - return urules; } // Quick-and-dirty escaping function. @@ -824,17 +803,25 @@ escape(const UChar *s, char *buffer) { } } -static struct SResource * -addCollation(ParseState* state, struct SResource *result, uint32_t startline, UErrorCode *status) +} // namespace + +#endif // !UCONFIG_NO_COLLATION + +static TableResource * +addCollation(ParseState* state, TableResource *result, const char *collationType, + uint32_t startline, UErrorCode *status) { + // TODO: Use LocalPointer for result, or make caller close it when there is a failure. struct SResource *member = NULL; struct UString *tokenValue; struct UString comment; enum ETokenType token; char subtag[1024]; + UnicodeString rules; + UBool haveRules = FALSE; UVersionInfo version; uint32_t line; - GenrbData genrbdata; + /* '{' . (name resource)* '}' */ version[0]=0; version[1]=0; version[2]=0; version[3]=0; @@ -845,7 +832,7 @@ addCollation(ParseState* state, struct SResource *result, uint32_t startline, U if (token == TOK_CLOSE_BRACE) { - return result; + break; } if (token != TOK_STRING) @@ -880,140 +867,51 @@ addCollation(ParseState* state, struct SResource *result, uint32_t startline, U res_close(result); return NULL; } - - if (uprv_strcmp(subtag, "Version") == 0) + if (result == NULL) + { + // Ignore the parsed resources, continue parsing. + } + else if (uprv_strcmp(subtag, "Version") == 0 && member->isString()) { + StringResource *sr = static_cast(member); char ver[40]; - int32_t length = member->u.fString.fLength; + int32_t length = sr->length(); - if (length >= (int32_t) sizeof(ver)) + if (length >= UPRV_LENGTHOF(ver)) { - length = (int32_t) sizeof(ver) - 1; + length = UPRV_LENGTHOF(ver) - 1; } - u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */ + sr->fString.extract(0, length, ver, UPRV_LENGTHOF(ver), US_INV); u_versionFromString(version, ver); - table_add(result, member, line, status); - - } - else if (uprv_strcmp(subtag, "Override") == 0) - { - // UBool override = (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0); - table_add(result, member, line, status); - + result->add(member, line, *status); + member = NULL; } else if(uprv_strcmp(subtag, "%%CollationBin")==0) { /* discard duplicate %%CollationBin if any*/ } - else if (uprv_strcmp(subtag, "Sequence") == 0) + else if (uprv_strcmp(subtag, "Sequence") == 0 && member->isString()) { -#if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO - warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h"); -#else - if(state->makeBinaryCollation) { - - /* do the collation elements */ - int32_t len = 0; - uint8_t *data = NULL; - UCollator *coll = NULL; - int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)]; - int32_t reorderCodeCount; - int32_t reorderCodeIndex; - UParseError parseError; - - genrbdata.inputDir = state->inputdir; - genrbdata.outputDir = state->outputdir; - - UErrorCode intStatus = U_ZERO_ERROR; - uprv_memset(&parseError, 0, sizeof(parseError)); - coll = ucol_openRulesForImport(member->u.fString.fChars, member->u.fString.fLength, - UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, importFromDataFile, &genrbdata, &intStatus); - - if (U_SUCCESS(intStatus) && coll != NULL) - { - len = ucol_cloneBinary(coll, NULL, 0, &intStatus); - data = (uint8_t *)uprv_malloc(len); - intStatus = U_ZERO_ERROR; - len = ucol_cloneBinary(coll, data, len, &intStatus); - /*data = ucol_cloneRuleData(coll, &len, &intStatus);*/ - - /* tailoring rules version */ - /* This is wrong! */ - /*coll->dataInfo.dataVersion[1] = version[0];*/ - /* Copy tailoring version. Builder version already */ - /* set in ucol_openRules */ - ((UCATableHeader *)data)->version[1] = version[0]; - ((UCATableHeader *)data)->version[2] = version[1]; - ((UCATableHeader *)data)->version[3] = version[2]; - - if (U_SUCCESS(intStatus) && data != NULL) - { - struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", len, data, NULL, NULL, status); - table_add(result, collationBin, line, status); - uprv_free(data); - - reorderCodeCount = ucol_getReorderCodes( - coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST), &intStatus); - if (U_SUCCESS(intStatus) && reorderCodeCount > 0) { - struct SResource *reorderCodeRes = intvector_open(state->bundle, "%%ReorderCodes", NULL, status); - for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodeCount; reorderCodeIndex++) { - intvector_add(reorderCodeRes, reorderCodes[reorderCodeIndex], status); - } - table_add(result, reorderCodeRes, line, status); - } - } - else - { - warning(line, "could not obtain rules from collator"); - if(isStrict()){ - *status = U_INVALID_FORMAT_ERROR; - return NULL; - } - } - - ucol_close(coll); - } - else - { - if(intStatus == U_FILE_ACCESS_ERROR) { - error(startline, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly."); - *status = intStatus; - return NULL; - } - char preBuffer[100], postBuffer[100]; - escape(parseError.preContext, preBuffer); - escape(parseError.postContext, postBuffer); - warning(line, - "%%%%CollationBin could not be constructed from CollationElements\n" - " check context, check that the FractionalUCA.txt UCA version " - "matches the current UCD version\n" - " UErrorCode=%s UParseError={ line=%d offset=%d pre=<> post=<> }", - u_errorName(intStatus), - parseError.line, - parseError.offset, - preBuffer, - postBuffer); - if(isStrict()){ - *status = intStatus; - return NULL; - } - } - } else { - if(isVerbose()) { - printf("Not building Collation binary\n"); - } - } -#endif + StringResource *sr = static_cast(member); + rules = sr->fString; + haveRules = TRUE; + // Defer building the collator until we have seen + // all sub-elements of the collation table, including the Version. /* in order to achieve smaller data files, we can direct genrb */ /* to omit collation rules */ - if(gOmitCollationRules) { - bundle_closeString(state->bundle, member); - } else { - table_add(result, member, line, status); + if(!state->omitCollationRules) { + result->add(member, line, *status); + member = NULL; } } + else // Just copy non-special items. + { + result->add(member, line, *status); + member = NULL; + } + res_close(member); // TODO: use LocalPointer if (U_FAILURE(*status)) { res_close(result); @@ -1021,17 +919,124 @@ addCollation(ParseState* state, struct SResource *result, uint32_t startline, U } } - // Reached the end without a TOK_CLOSE_BRACE. Should be an error. - *status = U_INTERNAL_PROGRAM_ERROR; - return NULL; + if (!haveRules) { return result; } + +#if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO + warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h"); + (void)collationType; +#else + // CLDR ticket #3949, ICU ticket #8082: + // Do not build collation binary data for for-import-only "private" collation rule strings. + if (uprv_strncmp(collationType, "private-", 8) == 0) { + if(isVerbose()) { + printf("Not building %s~%s collation binary\n", state->filename, collationType); + } + return result; + } + + if(!state->makeBinaryCollation) { + if(isVerbose()) { + printf("Not building %s~%s collation binary\n", state->filename, collationType); + } + return result; + } + UErrorCode intStatus = U_ZERO_ERROR; + UParseError parseError; + uprv_memset(&parseError, 0, sizeof(parseError)); + GenrbImporter importer(state->inputdir, state->outputdir); + const icu::CollationTailoring *base = icu::CollationRoot::getRoot(intStatus); + if(U_FAILURE(intStatus)) { + error(line, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus)); + res_close(result); + return NULL; // TODO: use LocalUResourceBundlePointer for result + } + icu::CollationBuilder builder(base, intStatus); + if(uprv_strncmp(collationType, "search", 6) == 0) { + builder.disableFastLatin(); // build fast-Latin table unless search collator + } + LocalPointer t( + builder.parseAndBuild(rules, version, &importer, &parseError, intStatus)); + if(U_FAILURE(intStatus)) { + const char *reason = builder.getErrorReason(); + if(reason == NULL) { reason = ""; } + error(line, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s %s", + state->filename, collationType, + (long)parseError.offset, u_errorName(intStatus), reason); + if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) { + // Print pre- and post-context. + char preBuffer[100], postBuffer[100]; + escape(parseError.preContext, preBuffer); + escape(parseError.postContext, postBuffer); + error(line, " error context: \"...%s\" ! \"%s...\"", preBuffer, postBuffer); + } + if(isStrict() || t.isNull()) { + *status = intStatus; + res_close(result); + return NULL; + } + } + icu::LocalMemory buffer; + int32_t capacity = 100000; + uint8_t *dest = buffer.allocateInsteadAndCopy(capacity); + if(dest == NULL) { + fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n", + (long)capacity); + *status = U_MEMORY_ALLOCATION_ERROR; + res_close(result); + return NULL; + } + int32_t indexes[icu::CollationDataReader::IX_TOTAL_SIZE + 1]; + int32_t totalSize = icu::CollationDataWriter::writeTailoring( + *t, *t->settings, indexes, dest, capacity, intStatus); + if(intStatus == U_BUFFER_OVERFLOW_ERROR) { + intStatus = U_ZERO_ERROR; + capacity = totalSize; + dest = buffer.allocateInsteadAndCopy(capacity); + if(dest == NULL) { + fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n", + (long)capacity); + *status = U_MEMORY_ALLOCATION_ERROR; + res_close(result); + return NULL; + } + totalSize = icu::CollationDataWriter::writeTailoring( + *t, *t->settings, indexes, dest, capacity, intStatus); + } + if(U_FAILURE(intStatus)) { + fprintf(stderr, "CollationDataWriter::writeTailoring() failed: %s\n", + u_errorName(intStatus)); + res_close(result); + return NULL; + } + if(isVerbose()) { + printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType); + icu::CollationInfo::printSizes(totalSize, indexes); + if(t->settings->hasReordering()) { + printf("%s~%s collation reordering ranges:\n", state->filename, collationType); + icu::CollationInfo::printReorderRanges( + *t->data, t->settings->reorderCodes, t->settings->reorderCodesLength); + } + } + struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", totalSize, dest, NULL, NULL, status); + result->add(collationBin, line, *status); + if (U_FAILURE(*status)) { + res_close(result); + return NULL; + } +#endif + return result; +} + +static UBool +keepCollationType(const char * /*type*/) { + return TRUE; } static struct SResource * parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status) { - struct SResource *result = NULL; + TableResource *result = NULL; struct SResource *member = NULL; - struct SResource *collationRes = NULL; struct UString *tokenValue; struct UString comment; enum ETokenType token; @@ -1048,7 +1053,7 @@ parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool n printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); } if(!newCollation) { - return addCollation(state, result, startline, status); + return addCollation(state, result, "(no type)", startline, status); } else { for(;;) { @@ -1095,7 +1100,7 @@ parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool n return NULL; } - table_add(result, member, line, status); + result->add(member, line, *status); } else { @@ -1105,10 +1110,16 @@ parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool n /* then, we cannot handle aliases */ if(token == TOK_OPEN_BRACE) { token = getToken(state, &tokenValue, &comment, &line, status); - collationRes = table_open(state->bundle, subtag, NULL, status); - collationRes = addCollation(state, collationRes, startline, status); /* need to parse the collation data regardless */ - if (gIncludeUnihanColl || uprv_strcmp(subtag, "unihan") != 0) { - table_add(result, collationRes, startline, status); + TableResource *collationRes; + if (keepCollationType(subtag)) { + collationRes = table_open(state->bundle, subtag, NULL, status); + } else { + collationRes = NULL; + } + // need to parse the collation data regardless + collationRes = addCollation(state, collationRes, subtag, startline, status); + if (collationRes != NULL) { + result->add(collationRes, startline, *status); } } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */ /* we could have a table too */ @@ -1122,7 +1133,7 @@ parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool n return NULL; } - table_add(result, member, line, status); + result->add(member, line, *status); } else { res_close(result); *status = U_INVALID_FORMAT_ERROR; @@ -1151,7 +1162,7 @@ parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool n /* Necessary, because CollationElements requires the bundle->fRoot member to be present which, if this weren't special-cased, wouldn't be set until the entire file had been processed. */ static struct SResource * -realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status) +realParseTable(ParseState* state, TableResource *table, char *tag, uint32_t startline, UErrorCode *status) { struct SResource *member = NULL; struct UString *tokenValue=NULL; @@ -1217,7 +1228,7 @@ realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t s return NULL; } - table_add(table, member, line, status); + table->add(member, line, *status); if (U_FAILURE(*status)) { @@ -1237,8 +1248,6 @@ realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t s static struct SResource * parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) { - struct SResource *result; - if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0) { return parseCollationElements(state, tag, startline, FALSE, status); @@ -1251,7 +1260,7 @@ parseTable(ParseState* state, char *tag, uint32_t startline, const struct UStrin printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); } - result = table_open(state->bundle, tag, comment, status); + TableResource *result = table_open(state->bundle, tag, comment, status); if (result == NULL || U_FAILURE(*status)) { @@ -1263,14 +1272,13 @@ parseTable(ParseState* state, char *tag, uint32_t startline, const struct UStrin static struct SResource * parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) { - struct SResource *result = NULL; struct SResource *member = NULL; struct UString *tokenValue; struct UString memberComments; enum ETokenType token; UBool readToken = FALSE; - result = array_open(state->bundle, tag, comment, status); + ArrayResource *result = array_open(state->bundle, tag, comment, status); if (result == NULL || U_FAILURE(*status)) { @@ -1326,13 +1334,7 @@ parseArray(ParseState* state, char *tag, uint32_t startline, const struct UStrin return NULL; } - array_add(result, member, status); - - if (U_FAILURE(*status)) - { - res_close(result); - return NULL; - } + result->add(member); /* eat optional comma if present */ token = peekToken(state, 0, NULL, NULL, NULL, status); @@ -1357,7 +1359,6 @@ parseArray(ParseState* state, char *tag, uint32_t startline, const struct UStrin static struct SResource * parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) { - struct SResource *result = NULL; enum ETokenType token; char *string; int32_t value; @@ -1366,7 +1367,7 @@ parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct US uint32_t len; struct UString memberComments; - result = intvector_open(state->bundle, tag, comment, status); + IntVectorResource *result = intvector_open(state->bundle, tag, comment, status); if (result == NULL || U_FAILURE(*status)) { @@ -1410,7 +1411,7 @@ parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct US if(len==uprv_strlen(string)) { - intvector_add(result, value, status); + result->add(value, *status); uprv_free(string); token = peekToken(state, 0, NULL, NULL, NULL, status); } @@ -1445,28 +1446,16 @@ parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct US static struct SResource * parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) { - struct SResource *result = NULL; - uint8_t *value; - char *string; - char toConv[3] = {'\0', '\0', '\0'}; - uint32_t count; - uint32_t i; - uint32_t line; - char *stopstring; - uint32_t len; - - string = getInvariantString(state, &line, NULL, status); - - if (string == NULL || U_FAILURE(*status)) + uint32_t line; + LocalMemory string(getInvariantString(state, &line, NULL, status)); + if (string.isNull() || U_FAILURE(*status)) { return NULL; } expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); - if (U_FAILURE(*status)) { - uprv_free(string); return NULL; } @@ -1474,54 +1463,47 @@ parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UStri printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); } - count = (uint32_t)uprv_strlen(string); + uint32_t count = (uint32_t)uprv_strlen(string.getAlias()); if (count > 0){ if((count % 2)==0){ - value = static_cast(uprv_malloc(sizeof(uint8_t) * count)); - - if (value == NULL) + LocalMemory value; + if (value.allocateInsteadAndCopy(count) == NULL) { - uprv_free(string); *status = U_MEMORY_ALLOCATION_ERROR; return NULL; } - for (i = 0; i < count; i += 2) + char toConv[3] = {'\0', '\0', '\0'}; + for (uint32_t i = 0; i < count; i += 2) { toConv[0] = string[i]; toConv[1] = string[i + 1]; + char *stopstring; value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16); - len=(uint32_t)(stopstring-toConv); + uint32_t len=(uint32_t)(stopstring-toConv); - if(len!=uprv_strlen(toConv)) + if(len!=2) { - uprv_free(string); *status=U_INVALID_CHAR_FOUND; return NULL; } } - result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status); - - uprv_free(value); + return bin_open(state->bundle, tag, count >> 1, value.getAlias(), NULL, comment, status); } else { *status = U_INVALID_CHAR_FOUND; - uprv_free(string); - error(line, "Encountered invalid binary string"); + error(line, "Encountered invalid binary value (length is odd)"); return NULL; } } else { - result = bin_open(state->bundle, tag, 0, NULL, "",comment,status); - warning(startline, "Encountered empty binary tag"); + warning(startline, "Encountered empty binary value"); + return bin_open(state->bundle, tag, 0, NULL, "", comment, status); } - uprv_free(string); - - return result; } static struct SResource * @@ -1577,15 +1559,8 @@ parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UStr static struct SResource * parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) { - struct SResource *result; - FileStream *file; - int32_t len; - uint8_t *data; - char *filename; uint32_t line; - char *fullname = NULL; - filename = getInvariantString(state, &line, NULL, status); - + LocalMemory filename(getInvariantString(state, &line, NULL, status)); if (U_FAILURE(*status)) { return NULL; @@ -1595,7 +1570,6 @@ parseImport(ParseState* state, char *tag, uint32_t startline, const struct UStri if (U_FAILURE(*status)) { - uprv_free(filename); return NULL; } @@ -1604,90 +1578,36 @@ parseImport(ParseState* state, char *tag, uint32_t startline, const struct UStri } /* Open the input file for reading */ - if (state->inputdir == NULL) - { -#if 1 - /* - * Always save file file name, even if there's - * no input directory specified. MIGHT BREAK SOMETHING - */ - int32_t filenameLength = uprv_strlen(filename); - - fullname = (char *) uprv_malloc(filenameLength + 1); - uprv_strcpy(fullname, filename); -#endif - - file = T_FileStream_open(filename, "rb"); + CharString fullname; + if (state->inputdir != NULL) { + fullname.append(state->inputdir, *status); } - else - { - - int32_t count = (int32_t)uprv_strlen(filename); - - if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) - { - fullname = (char *) uprv_malloc(state->inputdirLength + count + 2); - - /* test for NULL */ - if(fullname == NULL) - { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - uprv_strcpy(fullname, state->inputdir); - - fullname[state->inputdirLength] = U_FILE_SEP_CHAR; - fullname[state->inputdirLength + 1] = '\0'; - - uprv_strcat(fullname, filename); - } - else - { - fullname = (char *) uprv_malloc(state->inputdirLength + count + 1); - - /* test for NULL */ - if(fullname == NULL) - { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - uprv_strcpy(fullname, state->inputdir); - uprv_strcat(fullname, filename); - } - - file = T_FileStream_open(fullname, "rb"); - + fullname.appendPathPart(filename.getAlias(), *status); + if (U_FAILURE(*status)) { + return NULL; } + FileStream *file = T_FileStream_open(fullname.data(), "rb"); if (file == NULL) { - error(line, "couldn't open input file %s", filename); + error(line, "couldn't open input file %s", filename.getAlias()); *status = U_FILE_ACCESS_ERROR; return NULL; } - len = T_FileStream_size(file); - data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t)); - /* test for NULL */ - if(data == NULL) + int32_t len = T_FileStream_size(file); + LocalMemory data; + if(data.allocateInsteadAndCopy(len) == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; T_FileStream_close (file); return NULL; } - /* int32_t numRead = */ T_FileStream_read (file, data, len); + /* int32_t numRead = */ T_FileStream_read(file, data.getAlias(), len); T_FileStream_close (file); - result = bin_open(state->bundle, tag, len, data, fullname, comment, status); - - uprv_free(data); - uprv_free(filename); - uprv_free(fullname); - - return result; + return bin_open(state->bundle, tag, len, data.getAlias(), fullname.data(), comment, status); } static struct SResource * @@ -1799,22 +1719,22 @@ U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19); typedef enum EResourceType { - RT_UNKNOWN, - RT_STRING, - RT_BINARY, - RT_TABLE, - RT_TABLE_NO_FALLBACK, - RT_INTEGER, - RT_ARRAY, - RT_ALIAS, - RT_INTVECTOR, - RT_IMPORT, - RT_INCLUDE, - RT_PROCESS_UCA_RULES, - RT_PROCESS_COLLATION, - RT_PROCESS_TRANSLITERATOR, - RT_PROCESS_DEPENDENCY, - RT_RESERVED + RESTYPE_UNKNOWN, + RESTYPE_STRING, + RESTYPE_BINARY, + RESTYPE_TABLE, + RESTYPE_TABLE_NO_FALLBACK, + RESTYPE_INTEGER, + RESTYPE_ARRAY, + RESTYPE_ALIAS, + RESTYPE_INTVECTOR, + RESTYPE_IMPORT, + RESTYPE_INCLUDE, + RESTYPE_PROCESS_UCA_RULES, + RESTYPE_PROCESS_COLLATION, + RESTYPE_PROCESS_TRANSLITERATOR, + RESTYPE_PROCESS_DEPENDENCY, + RESTYPE_RESERVED } EResourceType; static struct { @@ -1840,7 +1760,7 @@ static struct { {"reserved", NULL, NULL} }; -void initParser(UBool omitCollationRules) +void initParser() { U_STRING_INIT(k_type_string, "string", 6); U_STRING_INIT(k_type_binary, "binary", 6); @@ -1859,12 +1779,10 @@ void initParser(UBool omitCollationRules) U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18); U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23); U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19); - - gOmitCollationRules = omitCollationRules; } static inline UBool isTable(enum EResourceType type) { - return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK); + return (UBool)(type==RESTYPE_TABLE || type==RESTYPE_TABLE_NO_FALLBACK); } static enum EResourceType @@ -1872,33 +1790,33 @@ parseResourceType(ParseState* state, UErrorCode *status) { struct UString *tokenValue; struct UString comment; - enum EResourceType result = RT_UNKNOWN; + enum EResourceType result = RESTYPE_UNKNOWN; uint32_t line=0; ustr_init(&comment); expect(state, TOK_STRING, &tokenValue, &comment, &line, status); if (U_FAILURE(*status)) { - return RT_UNKNOWN; + return RESTYPE_UNKNOWN; } *status = U_ZERO_ERROR; /* Search for normal types */ - result=RT_UNKNOWN; - while ((result=(EResourceType)(result+1)) < RT_RESERVED) { + result=RESTYPE_UNKNOWN; + while ((result=(EResourceType)(result+1)) < RESTYPE_RESERVED) { if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) { break; } } /* Now search for the aliases */ if (u_strcmp(tokenValue->fChars, k_type_int) == 0) { - result = RT_INTEGER; + result = RESTYPE_INTEGER; } else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) { - result = RT_BINARY; + result = RESTYPE_BINARY; } - else if (result == RT_RESERVED) { + else if (result == RESTYPE_RESERVED) { char tokenBuffer[1024]; u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer)); tokenBuffer[sizeof(tokenBuffer) - 1] = 0; @@ -1914,7 +1832,7 @@ static struct SResource * parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status) { enum ETokenType token; - enum EResourceType resType = RT_UNKNOWN; + enum EResourceType resType = RESTYPE_UNKNOWN; ParseResourceFunction *parseFunction = NULL; struct UString *tokenValue; uint32_t startline; @@ -1963,7 +1881,7 @@ parseResource(ParseState* state, char *tag, const struct UString *comment, UErro } - if (resType == RT_UNKNOWN) + if (resType == RESTYPE_UNKNOWN) { /* No explicit type, so try to work it out. At this point, we've read the first '{'. We could have any of the following: @@ -1986,7 +1904,7 @@ parseResource(ParseState* state, char *tag, const struct UString *comment, UErro if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE ) { - resType = RT_ARRAY; + resType = RESTYPE_ARRAY; } else if (token == TOK_STRING) { @@ -1999,10 +1917,10 @@ parseResource(ParseState* state, char *tag, const struct UString *comment, UErro switch (token) { - case TOK_COMMA: resType = RT_ARRAY; break; - case TOK_OPEN_BRACE: resType = RT_TABLE; break; - case TOK_CLOSE_BRACE: resType = RT_STRING; break; - case TOK_COLON: resType = RT_TABLE; break; + case TOK_COMMA: resType = RESTYPE_ARRAY; break; + case TOK_OPEN_BRACE: resType = RESTYPE_TABLE; break; + case TOK_CLOSE_BRACE: resType = RESTYPE_STRING; break; + case TOK_COLON: resType = RESTYPE_TABLE; break; default: *status = U_INVALID_FORMAT_ERROR; error(line, "Unexpected token after string, expected ',', '{' or '}'"); @@ -2017,7 +1935,7 @@ parseResource(ParseState* state, char *tag, const struct UString *comment, UErro } /* printf("Type guessed as %s\n", resourceNames[resType]); */ - } else if(resType == RT_TABLE_NO_FALLBACK) { + } else if(resType == RESTYPE_TABLE_NO_FALLBACK) { *status = U_INVALID_FORMAT_ERROR; error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars); return NULL; @@ -2040,8 +1958,8 @@ parseResource(ParseState* state, char *tag, const struct UString *comment, UErro /* parse the top-level resource */ struct SRBRoot * -parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UBool makeBinaryCollation, - UErrorCode *status) +parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, const char *filename, + UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status) { struct UString *tokenValue; struct UString comment; @@ -2064,12 +1982,14 @@ parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UBool makeBina state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0; state.outputdir = outputDir; state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0; + state.filename = filename; state.makeBinaryCollation = makeBinaryCollation; + state.omitCollationRules = omitCollationRules; ustr_init(&comment); expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status); - state.bundle = bundle_open(&comment, FALSE, status); + state.bundle = new SRBRoot(&comment, FALSE, *status); if (state.bundle == NULL || U_FAILURE(*status)) { @@ -2077,7 +1997,7 @@ parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UBool makeBina } - bundle_setlocale(state.bundle, tokenValue->fChars, status); + state.bundle->setLocale(tokenValue->fChars, *status); /* The following code is to make Empty bundle work no matter with :table specifer or not */ token = getToken(&state, NULL, NULL, &line, status); @@ -2101,40 +2021,43 @@ parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UBool makeBina if(token==TOK_OPEN_BRACE) { *status=U_ZERO_ERROR; - bundleType=RT_TABLE; + bundleType=RESTYPE_TABLE; } else { /* neither colon nor open brace */ *status=U_PARSE_ERROR; - bundleType=RT_UNKNOWN; + bundleType=RESTYPE_UNKNOWN; error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status)); } } if (U_FAILURE(*status)) { - bundle_close(state.bundle, status); + delete state.bundle; return NULL; } - if(bundleType==RT_TABLE_NO_FALLBACK) { + if(bundleType==RESTYPE_TABLE_NO_FALLBACK) { /* * Parse a top-level table with the table(nofallback) declaration. * This is the same as a regular table, but also sets the * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] . */ - state.bundle->noFallback=TRUE; + state.bundle->fNoFallback=TRUE; } /* top-level tables need not handle special table names like "collations" */ - realParseTable(&state, state.bundle->fRoot, NULL, line, status); + assert(!state.bundle->fIsPoolBundle); + assert(state.bundle->fRoot->fType == URES_TABLE); + TableResource *rootTable = static_cast(state.bundle->fRoot); + realParseTable(&state, rootTable, NULL, line, status); if(dependencyArray!=NULL){ - table_add(state.bundle->fRoot, dependencyArray, 0, status); + rootTable->add(dependencyArray, 0, *status); dependencyArray = NULL; } if (U_FAILURE(*status)) { - bundle_close(state.bundle, status); + delete state.bundle; res_close(dependencyArray); return NULL; }