/*
*******************************************************************************
*
-* Copyright (C) 1998-2012, International Business Machines
+* Copyright (C) 1998-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*******************************************************************************
*/
-#include "ucol_imp.h"
+// Safer use of UnicodeString.
+#ifndef UNISTR_FROM_CHAR_EXPLICIT
+# define UNISTR_FROM_CHAR_EXPLICIT explicit
+#endif
+
+// Less important, but still a good idea.
+#ifndef UNISTR_FROM_STRING_EXPLICIT
+# define UNISTR_FROM_STRING_EXPLICIT explicit
+#endif
+
+#include <assert.h>
#include "parse.h"
#include "errmsg.h"
#include "uhash.h"
#include "reslist.h"
#include "rbt_pars.h"
#include "genrb.h"
+#include "unicode/stringpiece.h"
+#include "unicode/unistr.h"
#include "unicode/ustring.h"
#include "unicode/uscript.h"
+#include "unicode/utf16.h"
#include "unicode/putil.h"
+#include "charstr.h"
+#include "collationbuilder.h"
+#include "collationdata.h"
+#include "collationdatareader.h"
+#include "collationdatawriter.h"
+#include "collationfastlatinbuilder.h"
+#include "collationinfo.h"
+#include "collationroot.h"
+#include "collationruleparser.h"
+#include "collationtailoring.h"
#include <stdio.h>
/* Number of tokens to read ahead of the current stream position */
#define OPENSQBRACKET 0x005B
#define CLOSESQBRACKET 0x005D
+using icu::CharString;
+using icu::LocalMemory;
+using icu::LocalPointer;
+using icu::LocalUCHARBUFPointer;
+using icu::StringPiece;
+using icu::UnicodeString;
+
struct Lookahead
{
enum ETokenType type;
};
/* Just to store "TRUE" */
-static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
+//static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
typedef struct {
struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
uint32_t inputdirLength;
const char *outputdir;
uint32_t outputdirLength;
+ const char *filename;
UBool makeBinaryCollation;
+ UBool omitCollationRules;
} ParseState;
-static UBool gOmitCollationRules = FALSE;
-
typedef struct SResource *
ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
cleanupLookahead(ParseState* state)
{
uint32_t i;
- for (i = 0; i < MAX_LOOKAHEAD; i++)
+ for (i = 0; i <= MAX_LOOKAHEAD; i++)
{
ustr_deinit(&state->lookahead[i].value);
ustr_deinit(&state->lookahead[i].comment);
return NULL;
}
- result = reinterpret_cast<char *>(uprv_malloc(count+1));
+ result = static_cast<char *>(uprv_malloc(count+1));
if (result == NULL)
{
char filename[256] = { '\0' };
char cs[128] = { '\0' };
uint32_t line;
- int len=0;
UBool quoted = FALSE;
UCHARBUF *ucbuf=NULL;
UChar32 c = 0;
}
uprv_strcat(filename, cs);
- if(gOmitCollationRules) {
+ if(state->omitCollationRules) {
return res_none();
}
* append at the end of the loop
*/
while(c != ENDCOMMAND) {
- U_APPEND_CHAR32(c, target,len);
+ U_APPEND_CHAR32_ONLY(c, target);
c = ucbuf_getc(ucbuf, status);
}
}
/* Append UChar * after dissembling if c > 0xffff*/
if (c != (UChar32)U_EOF)
{
- U_APPEND_CHAR32(c, target,len);
+ U_APPEND_CHAR32_ONLY(c, target);
}
else
{
return result;
}
-static struct SResource* dependencyArray = NULL;
+static ArrayResource* dependencyArray = NULL;
static struct SResource *
parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
}
elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
- array_add(dependencyArray, elem, status);
+ dependencyArray->add(elem);
if (U_FAILURE(*status))
{
return result;
}
-typedef struct{
- const char* inputDir;
- const char* outputDir;
-} GenrbData;
+#if !UCONFIG_NO_COLLATION
+
+namespace {
static struct SResource* resLookup(struct SResource* res, const char* key){
- struct SResource *current = NULL;
- struct SResTable *list;
- if (res == res_none()) {
+ if (res == res_none() || !res->isTable()) {
return NULL;
}
- list = &(res->u.fTable);
-
- current = list->fFirst;
+ TableResource *list = static_cast<TableResource *>(res);
+ SResource *current = list->fFirst;
while (current != NULL) {
if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
return current;
return NULL;
}
-static const UChar* importFromDataFile(void* context, const char* locale, const char* type, int32_t* pLength, UErrorCode* status){
- struct SRBRoot *data = NULL;
- UCHARBUF *ucbuf = NULL;
- GenrbData* genrbdata = (GenrbData*) context;
- int localeLength = strlen(locale);
- char* filename = (char*)uprv_malloc(localeLength+5);
- char *inputDirBuf = NULL;
- char *openFileName = NULL;
- const char* cp = "";
- UChar* urules = NULL;
- int32_t urulesLength = 0;
- int32_t i = 0;
- int32_t dirlen = 0;
- int32_t filelen = 0;
- struct SResource* root;
- struct SResource* collations;
- struct SResource* collation;
- struct SResource* sequence;
-
- memcpy(filename, locale, localeLength);
- for(i = 0; i < localeLength; i++){
- if(filename[i] == '-'){
- filename[i] = '_';
- }
- }
- filename[localeLength] = '.';
- filename[localeLength+1] = 't';
- filename[localeLength+2] = 'x';
- filename[localeLength+3] = 't';
- filename[localeLength+4] = 0;
+class GenrbImporter : public icu::CollationRuleParser::Importer {
+public:
+ GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out) {}
+ virtual ~GenrbImporter();
+ virtual void getRules(
+ const char *localeID, const char *collationType,
+ UnicodeString &rules,
+ const char *&errorReason, UErrorCode &errorCode);
+
+private:
+ const char *inputDir;
+ const char *outputDir;
+};
+GenrbImporter::~GenrbImporter() {}
- if (status==NULL || U_FAILURE(*status)) {
- return NULL;
+void
+GenrbImporter::getRules(
+ const char *localeID, const char *collationType,
+ UnicodeString &rules,
+ const char *& /*errorReason*/, UErrorCode &errorCode) {
+ CharString filename(localeID, errorCode);
+ for(int32_t i = 0; i < filename.length(); i++){
+ if(filename[i] == '-'){
+ filename.data()[i] = '_';
+ }
}
- if(filename==NULL){
- *status=U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }else{
- filelen = (int32_t)uprv_strlen(filename);
+ filename.append(".txt", errorCode);
+ if (U_FAILURE(errorCode)) {
+ return;
}
- if(genrbdata->inputDir == NULL) {
- const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
- openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
- openFileName[0] = '\0';
+ CharString inputDirBuf;
+ CharString openFileName;
+ if(inputDir == NULL) {
+ const char *filenameBegin = uprv_strrchr(filename.data(), U_FILE_SEP_CHAR);
if (filenameBegin != NULL) {
/*
* When a filename ../../../data/root.txt is specified,
* This is very important when the resource file includes
* another file, like UCARules.txt or thaidict.brk.
*/
- int32_t filenameSize = (int32_t)(filenameBegin - filename + 1);
- inputDirBuf = uprv_strncpy((char *)uprv_malloc(filenameSize), filename, filenameSize);
-
- /* test for NULL */
- if(inputDirBuf == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- goto finish;
- }
-
- inputDirBuf[filenameSize - 1] = 0;
- genrbdata->inputDir = inputDirBuf;
- dirlen = (int32_t)uprv_strlen(genrbdata->inputDir);
+ StringPiece dir = filename.toStringPiece();
+ const char *filenameLimit = filename.data() + filename.length();
+ dir.remove_suffix((int32_t)(filenameLimit - filenameBegin));
+ inputDirBuf.append(dir, errorCode);
+ inputDir = inputDirBuf.data();
}
}else{
- dirlen = (int32_t)uprv_strlen(genrbdata->inputDir);
+ int32_t dirlen = (int32_t)uprv_strlen(inputDir);
- if(genrbdata->inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
- openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
-
- /* test for NULL */
- if(openFileName == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- goto finish;
- }
-
- openFileName[0] = '\0';
+ if((filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')) {
/*
* append the input dir to openFileName if the first char in
- * filename is not file seperation char and the last char input directory is not '.'.
+ * filename is not file separator char and the last char input directory is not '.'.
* This is to support :
* genrb -s. /home/icu/data
* genrb -s. icu/data
* user should use
* genrb -s. icu/data --- start from CWD and look in icu/data dir
*/
- if( (filename[0] != U_FILE_SEP_CHAR) && (genrbdata->inputDir[dirlen-1] !='.')){
- uprv_strcpy(openFileName, genrbdata->inputDir);
- openFileName[dirlen] = U_FILE_SEP_CHAR;
- }
- openFileName[dirlen + 1] = '\0';
- } else {
- openFileName = (char *) uprv_malloc(dirlen + filelen + 1);
-
- /* test for NULL */
- if(openFileName == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- goto finish;
+ openFileName.append(inputDir, dirlen, errorCode);
+ if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
+ openFileName.append(U_FILE_SEP_CHAR, errorCode);
}
-
- uprv_strcpy(openFileName, genrbdata->inputDir);
-
}
}
- uprv_strcat(openFileName, filename);
- /* printf("%s\n", openFileName); */
- *status = U_ZERO_ERROR;
- ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, status);
-
- if(*status == U_FILE_ACCESS_ERROR) {
-
- fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName);
- goto finish;
+ openFileName.append(filename, errorCode);
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ // printf("GenrbImporter::getRules(%s, %s) reads %s\n", localeID, collationType, openFileName.data());
+ const char* cp = "";
+ LocalUCHARBUFPointer ucbuf(
+ ucbuf_open(openFileName.data(), &cp, getShowWarning(), TRUE, &errorCode));
+ if(errorCode == U_FILE_ACCESS_ERROR) {
+ fprintf(stderr, "couldn't open file %s\n", openFileName.data());
+ return;
}
- if (ucbuf == NULL || U_FAILURE(*status)) {
- fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(*status));
- goto finish;
+ if (ucbuf.isNull() || U_FAILURE(errorCode)) {
+ fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName.data(), u_errorName(errorCode));
+ return;
}
/* Parse the data into an SRBRoot */
- data = parse(ucbuf, genrbdata->inputDir, genrbdata->outputDir, FALSE, status);
+ struct SRBRoot *data =
+ parse(ucbuf.getAlias(), inputDir, outputDir, filename.data(), FALSE, FALSE, &errorCode);
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
- root = data->fRoot;
- collations = resLookup(root, "collations");
+ struct SResource *root = data->fRoot;
+ struct SResource *collations = resLookup(root, "collations");
if (collations != NULL) {
- collation = resLookup(collations, type);
+ struct SResource *collation = resLookup(collations, collationType);
if (collation != NULL) {
- sequence = resLookup(collation, "Sequence");
- if (sequence != NULL) {
- urules = sequence->u.fString.fChars;
- urulesLength = sequence->u.fString.fLength;
- *pLength = urulesLength;
+ struct SResource *sequence = resLookup(collation, "Sequence");
+ if (sequence != NULL && sequence->isString()) {
+ // No string pointer aliasing so that we need not hold onto the resource bundle.
+ StringResource *sr = static_cast<StringResource *>(sequence);
+ rules = sr->fString;
}
}
}
-
-finish:
- if (inputDirBuf != NULL) {
- uprv_free(inputDirBuf);
- }
-
- if (openFileName != NULL) {
- uprv_free(openFileName);
- }
-
- if(ucbuf) {
- ucbuf_close(ucbuf);
- }
-
- return urules;
}
// Quick-and-dirty escaping function.
}
}
-static struct SResource *
-addCollation(ParseState* state, struct SResource *result, uint32_t startline, UErrorCode *status)
+} // namespace
+
+#endif // !UCONFIG_NO_COLLATION
+
+static TableResource *
+addCollation(ParseState* state, TableResource *result, const char *collationType,
+ uint32_t startline, UErrorCode *status)
{
+ // TODO: Use LocalPointer for result, or make caller close it when there is a failure.
struct SResource *member = NULL;
struct UString *tokenValue;
struct UString comment;
enum ETokenType token;
char subtag[1024];
+ UnicodeString rules;
+ UBool haveRules = FALSE;
UVersionInfo version;
uint32_t line;
- GenrbData genrbdata;
+
/* '{' . (name resource)* '}' */
version[0]=0; version[1]=0; version[2]=0; version[3]=0;
if (token == TOK_CLOSE_BRACE)
{
- return result;
+ break;
}
if (token != TOK_STRING)
res_close(result);
return NULL;
}
-
- if (uprv_strcmp(subtag, "Version") == 0)
+ if (result == NULL)
{
+ // Ignore the parsed resources, continue parsing.
+ }
+ else if (uprv_strcmp(subtag, "Version") == 0 && member->isString())
+ {
+ StringResource *sr = static_cast<StringResource *>(member);
char ver[40];
- int32_t length = member->u.fString.fLength;
+ int32_t length = sr->length();
- if (length >= (int32_t) sizeof(ver))
+ if (length >= UPRV_LENGTHOF(ver))
{
- length = (int32_t) sizeof(ver) - 1;
+ length = UPRV_LENGTHOF(ver) - 1;
}
- u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
+ sr->fString.extract(0, length, ver, UPRV_LENGTHOF(ver), US_INV);
u_versionFromString(version, ver);
- table_add(result, member, line, status);
-
- }
- else if (uprv_strcmp(subtag, "Override") == 0)
- {
- // UBool override = (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0);
- table_add(result, member, line, status);
-
+ result->add(member, line, *status);
+ member = NULL;
}
else if(uprv_strcmp(subtag, "%%CollationBin")==0)
{
/* discard duplicate %%CollationBin if any*/
}
- else if (uprv_strcmp(subtag, "Sequence") == 0)
+ else if (uprv_strcmp(subtag, "Sequence") == 0 && member->isString())
{
-#if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
- warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
-#else
- if(state->makeBinaryCollation) {
-
- /* do the collation elements */
- int32_t len = 0;
- uint8_t *data = NULL;
- UCollator *coll = NULL;
- int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)];
- int32_t reorderCodeCount;
- int32_t reorderCodeIndex;
- UParseError parseError;
-
- genrbdata.inputDir = state->inputdir;
- genrbdata.outputDir = state->outputdir;
-
- UErrorCode intStatus = U_ZERO_ERROR;
- uprv_memset(&parseError, 0, sizeof(parseError));
- coll = ucol_openRulesForImport(member->u.fString.fChars, member->u.fString.fLength,
- UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, importFromDataFile, &genrbdata, &intStatus);
-
- if (U_SUCCESS(intStatus) && coll != NULL)
- {
- len = ucol_cloneBinary(coll, NULL, 0, &intStatus);
- data = (uint8_t *)uprv_malloc(len);
- intStatus = U_ZERO_ERROR;
- len = ucol_cloneBinary(coll, data, len, &intStatus);
- /*data = ucol_cloneRuleData(coll, &len, &intStatus);*/
-
- /* tailoring rules version */
- /* This is wrong! */
- /*coll->dataInfo.dataVersion[1] = version[0];*/
- /* Copy tailoring version. Builder version already */
- /* set in ucol_openRules */
- ((UCATableHeader *)data)->version[1] = version[0];
- ((UCATableHeader *)data)->version[2] = version[1];
- ((UCATableHeader *)data)->version[3] = version[2];
-
- if (U_SUCCESS(intStatus) && data != NULL)
- {
- struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", len, data, NULL, NULL, status);
- table_add(result, collationBin, line, status);
- uprv_free(data);
-
- reorderCodeCount = ucol_getReorderCodes(
- coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST), &intStatus);
- if (U_SUCCESS(intStatus) && reorderCodeCount > 0) {
- struct SResource *reorderCodeRes = intvector_open(state->bundle, "%%ReorderCodes", NULL, status);
- for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodeCount; reorderCodeIndex++) {
- intvector_add(reorderCodeRes, reorderCodes[reorderCodeIndex], status);
- }
- table_add(result, reorderCodeRes, line, status);
- }
- }
- else
- {
- warning(line, "could not obtain rules from collator");
- if(isStrict()){
- *status = U_INVALID_FORMAT_ERROR;
- return NULL;
- }
- }
-
- ucol_close(coll);
- }
- else
- {
- if(intStatus == U_FILE_ACCESS_ERROR) {
- error(startline, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly.");
- *status = intStatus;
- return NULL;
- }
- char preBuffer[100], postBuffer[100];
- escape(parseError.preContext, preBuffer);
- escape(parseError.postContext, postBuffer);
- warning(line,
- "%%%%CollationBin could not be constructed from CollationElements\n"
- " check context, check that the FractionalUCA.txt UCA version "
- "matches the current UCD version\n"
- " UErrorCode=%s UParseError={ line=%d offset=%d pre=<> post=<> }",
- u_errorName(intStatus),
- parseError.line,
- parseError.offset,
- preBuffer,
- postBuffer);
- if(isStrict()){
- *status = intStatus;
- return NULL;
- }
- }
- } else {
- if(isVerbose()) {
- printf("Not building Collation binary\n");
- }
- }
-#endif
+ StringResource *sr = static_cast<StringResource *>(member);
+ rules = sr->fString;
+ haveRules = TRUE;
+ // Defer building the collator until we have seen
+ // all sub-elements of the collation table, including the Version.
/* in order to achieve smaller data files, we can direct genrb */
/* to omit collation rules */
- if(gOmitCollationRules) {
- bundle_closeString(state->bundle, member);
- } else {
- table_add(result, member, line, status);
+ if(!state->omitCollationRules) {
+ result->add(member, line, *status);
+ member = NULL;
}
}
+ else // Just copy non-special items.
+ {
+ result->add(member, line, *status);
+ member = NULL;
+ }
+ res_close(member); // TODO: use LocalPointer
if (U_FAILURE(*status))
{
res_close(result);
}
}
- // Reached the end without a TOK_CLOSE_BRACE. Should be an error.
- *status = U_INTERNAL_PROGRAM_ERROR;
- return NULL;
+ if (!haveRules) { return result; }
+
+#if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
+ warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
+ (void)collationType;
+#else
+ // CLDR ticket #3949, ICU ticket #8082:
+ // Do not build collation binary data for for-import-only "private" collation rule strings.
+ if (uprv_strncmp(collationType, "private-", 8) == 0) {
+ if(isVerbose()) {
+ printf("Not building %s~%s collation binary\n", state->filename, collationType);
+ }
+ return result;
+ }
+
+ if(!state->makeBinaryCollation) {
+ if(isVerbose()) {
+ printf("Not building %s~%s collation binary\n", state->filename, collationType);
+ }
+ return result;
+ }
+ UErrorCode intStatus = U_ZERO_ERROR;
+ UParseError parseError;
+ uprv_memset(&parseError, 0, sizeof(parseError));
+ GenrbImporter importer(state->inputdir, state->outputdir);
+ const icu::CollationTailoring *base = icu::CollationRoot::getRoot(intStatus);
+ if(U_FAILURE(intStatus)) {
+ error(line, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus));
+ res_close(result);
+ return NULL; // TODO: use LocalUResourceBundlePointer for result
+ }
+ icu::CollationBuilder builder(base, intStatus);
+ if(uprv_strncmp(collationType, "search", 6) == 0) {
+ builder.disableFastLatin(); // build fast-Latin table unless search collator
+ }
+ LocalPointer<icu::CollationTailoring> t(
+ builder.parseAndBuild(rules, version, &importer, &parseError, intStatus));
+ if(U_FAILURE(intStatus)) {
+ const char *reason = builder.getErrorReason();
+ if(reason == NULL) { reason = ""; }
+ error(line, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s %s",
+ state->filename, collationType,
+ (long)parseError.offset, u_errorName(intStatus), reason);
+ if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) {
+ // Print pre- and post-context.
+ char preBuffer[100], postBuffer[100];
+ escape(parseError.preContext, preBuffer);
+ escape(parseError.postContext, postBuffer);
+ error(line, " error context: \"...%s\" ! \"%s...\"", preBuffer, postBuffer);
+ }
+ if(isStrict() || t.isNull()) {
+ *status = intStatus;
+ res_close(result);
+ return NULL;
+ }
+ }
+ icu::LocalMemory<uint8_t> buffer;
+ int32_t capacity = 100000;
+ uint8_t *dest = buffer.allocateInsteadAndCopy(capacity);
+ if(dest == NULL) {
+ fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
+ (long)capacity);
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ res_close(result);
+ return NULL;
+ }
+ int32_t indexes[icu::CollationDataReader::IX_TOTAL_SIZE + 1];
+ int32_t totalSize = icu::CollationDataWriter::writeTailoring(
+ *t, *t->settings, indexes, dest, capacity, intStatus);
+ if(intStatus == U_BUFFER_OVERFLOW_ERROR) {
+ intStatus = U_ZERO_ERROR;
+ capacity = totalSize;
+ dest = buffer.allocateInsteadAndCopy(capacity);
+ if(dest == NULL) {
+ fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
+ (long)capacity);
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ res_close(result);
+ return NULL;
+ }
+ totalSize = icu::CollationDataWriter::writeTailoring(
+ *t, *t->settings, indexes, dest, capacity, intStatus);
+ }
+ if(U_FAILURE(intStatus)) {
+ fprintf(stderr, "CollationDataWriter::writeTailoring() failed: %s\n",
+ u_errorName(intStatus));
+ res_close(result);
+ return NULL;
+ }
+ if(isVerbose()) {
+ printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType);
+ icu::CollationInfo::printSizes(totalSize, indexes);
+ if(t->settings->hasReordering()) {
+ printf("%s~%s collation reordering ranges:\n", state->filename, collationType);
+ icu::CollationInfo::printReorderRanges(
+ *t->data, t->settings->reorderCodes, t->settings->reorderCodesLength);
+ }
+ }
+ struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", totalSize, dest, NULL, NULL, status);
+ result->add(collationBin, line, *status);
+ if (U_FAILURE(*status)) {
+ res_close(result);
+ return NULL;
+ }
+#endif
+ return result;
+}
+
+static UBool
+keepCollationType(const char * /*type*/) {
+ return TRUE;
}
static struct SResource *
parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
{
- struct SResource *result = NULL;
+ TableResource *result = NULL;
struct SResource *member = NULL;
- struct SResource *collationRes = NULL;
struct UString *tokenValue;
struct UString comment;
enum ETokenType token;
printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
}
if(!newCollation) {
- return addCollation(state, result, startline, status);
+ return addCollation(state, result, "(no type)", startline, status);
}
else {
for(;;) {
return NULL;
}
- table_add(result, member, line, status);
+ result->add(member, line, *status);
}
else
{
/* then, we cannot handle aliases */
if(token == TOK_OPEN_BRACE) {
token = getToken(state, &tokenValue, &comment, &line, status);
- collationRes = table_open(state->bundle, subtag, NULL, status);
- collationRes = addCollation(state, collationRes, startline, status); /* need to parse the collation data regardless */
- if (gIncludeUnihanColl || uprv_strcmp(subtag, "unihan") != 0) {
- table_add(result, collationRes, startline, status);
+ TableResource *collationRes;
+ if (keepCollationType(subtag)) {
+ collationRes = table_open(state->bundle, subtag, NULL, status);
+ } else {
+ collationRes = NULL;
+ }
+ // need to parse the collation data regardless
+ collationRes = addCollation(state, collationRes, subtag, startline, status);
+ if (collationRes != NULL) {
+ result->add(collationRes, startline, *status);
}
} else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
/* we could have a table too */
return NULL;
}
- table_add(result, member, line, status);
+ result->add(member, line, *status);
} else {
res_close(result);
*status = U_INVALID_FORMAT_ERROR;
/* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
if this weren't special-cased, wouldn't be set until the entire file had been processed. */
static struct SResource *
-realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
+realParseTable(ParseState* state, TableResource *table, char *tag, uint32_t startline, UErrorCode *status)
{
struct SResource *member = NULL;
struct UString *tokenValue=NULL;
return NULL;
}
- table_add(table, member, line, status);
+ table->add(member, line, *status);
if (U_FAILURE(*status))
{
static struct SResource *
parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
{
- struct SResource *result;
-
if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
{
return parseCollationElements(state, tag, startline, FALSE, status);
printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
}
- result = table_open(state->bundle, tag, comment, status);
+ TableResource *result = table_open(state->bundle, tag, comment, status);
if (result == NULL || U_FAILURE(*status))
{
static struct SResource *
parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
{
- struct SResource *result = NULL;
struct SResource *member = NULL;
struct UString *tokenValue;
struct UString memberComments;
enum ETokenType token;
UBool readToken = FALSE;
- result = array_open(state->bundle, tag, comment, status);
+ ArrayResource *result = array_open(state->bundle, tag, comment, status);
if (result == NULL || U_FAILURE(*status))
{
return NULL;
}
- array_add(result, member, status);
-
- if (U_FAILURE(*status))
- {
- res_close(result);
- return NULL;
- }
+ result->add(member);
/* eat optional comma if present */
token = peekToken(state, 0, NULL, NULL, NULL, status);
static struct SResource *
parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
{
- struct SResource *result = NULL;
enum ETokenType token;
char *string;
int32_t value;
uint32_t len;
struct UString memberComments;
- result = intvector_open(state->bundle, tag, comment, status);
+ IntVectorResource *result = intvector_open(state->bundle, tag, comment, status);
if (result == NULL || U_FAILURE(*status))
{
if(len==uprv_strlen(string))
{
- intvector_add(result, value, status);
+ result->add(value, *status);
uprv_free(string);
token = peekToken(state, 0, NULL, NULL, NULL, status);
}
static struct SResource *
parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
{
- struct SResource *result = NULL;
- uint8_t *value;
- char *string;
- char toConv[3] = {'\0', '\0', '\0'};
- uint32_t count;
- uint32_t i;
- uint32_t line;
- char *stopstring;
- uint32_t len;
-
- string = getInvariantString(state, &line, NULL, status);
-
- if (string == NULL || U_FAILURE(*status))
+ uint32_t line;
+ LocalMemory<char> string(getInvariantString(state, &line, NULL, status));
+ if (string.isNull() || U_FAILURE(*status))
{
return NULL;
}
expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
-
if (U_FAILURE(*status))
{
- uprv_free(string);
return NULL;
}
printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
}
- count = (uint32_t)uprv_strlen(string);
+ uint32_t count = (uint32_t)uprv_strlen(string.getAlias());
if (count > 0){
if((count % 2)==0){
- value = reinterpret_cast<uint8_t *>(uprv_malloc(sizeof(uint8_t) * count));
-
- if (value == NULL)
+ LocalMemory<uint8_t> value;
+ if (value.allocateInsteadAndCopy(count) == NULL)
{
- uprv_free(string);
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
- for (i = 0; i < count; i += 2)
+ char toConv[3] = {'\0', '\0', '\0'};
+ for (uint32_t i = 0; i < count; i += 2)
{
toConv[0] = string[i];
toConv[1] = string[i + 1];
+ char *stopstring;
value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
- len=(uint32_t)(stopstring-toConv);
+ uint32_t len=(uint32_t)(stopstring-toConv);
- if(len!=uprv_strlen(toConv))
+ if(len!=2)
{
- uprv_free(string);
*status=U_INVALID_CHAR_FOUND;
return NULL;
}
}
- result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status);
-
- uprv_free(value);
+ return bin_open(state->bundle, tag, count >> 1, value.getAlias(), NULL, comment, status);
}
else
{
*status = U_INVALID_CHAR_FOUND;
- uprv_free(string);
- error(line, "Encountered invalid binary string");
+ error(line, "Encountered invalid binary value (length is odd)");
return NULL;
}
}
else
{
- result = bin_open(state->bundle, tag, 0, NULL, "",comment,status);
- warning(startline, "Encountered empty binary tag");
+ warning(startline, "Encountered empty binary value");
+ return bin_open(state->bundle, tag, 0, NULL, "", comment, status);
}
- uprv_free(string);
-
- return result;
}
static struct SResource *
static struct SResource *
parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
{
- struct SResource *result;
- FileStream *file;
- int32_t len;
- uint8_t *data;
- char *filename;
uint32_t line;
- char *fullname = NULL;
- filename = getInvariantString(state, &line, NULL, status);
-
+ LocalMemory<char> filename(getInvariantString(state, &line, NULL, status));
if (U_FAILURE(*status))
{
return NULL;
if (U_FAILURE(*status))
{
- uprv_free(filename);
return NULL;
}
}
/* Open the input file for reading */
- if (state->inputdir == NULL)
- {
-#if 1
- /*
- * Always save file file name, even if there's
- * no input directory specified. MIGHT BREAK SOMETHING
- */
- int32_t filenameLength = uprv_strlen(filename);
-
- fullname = (char *) uprv_malloc(filenameLength + 1);
- uprv_strcpy(fullname, filename);
-#endif
-
- file = T_FileStream_open(filename, "rb");
+ CharString fullname;
+ if (state->inputdir != NULL) {
+ fullname.append(state->inputdir, *status);
}
- else
- {
-
- int32_t count = (int32_t)uprv_strlen(filename);
-
- if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
- {
- fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
-
- /* test for NULL */
- if(fullname == NULL)
- {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
-
- uprv_strcpy(fullname, state->inputdir);
-
- fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
- fullname[state->inputdirLength + 1] = '\0';
-
- uprv_strcat(fullname, filename);
- }
- else
- {
- fullname = (char *) uprv_malloc(state->inputdirLength + count + 1);
-
- /* test for NULL */
- if(fullname == NULL)
- {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
-
- uprv_strcpy(fullname, state->inputdir);
- uprv_strcat(fullname, filename);
- }
-
- file = T_FileStream_open(fullname, "rb");
-
+ fullname.appendPathPart(filename.getAlias(), *status);
+ if (U_FAILURE(*status)) {
+ return NULL;
}
+ FileStream *file = T_FileStream_open(fullname.data(), "rb");
if (file == NULL)
{
- error(line, "couldn't open input file %s", filename);
+ error(line, "couldn't open input file %s", filename.getAlias());
*status = U_FILE_ACCESS_ERROR;
return NULL;
}
- len = T_FileStream_size(file);
- data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
- /* test for NULL */
- if(data == NULL)
+ int32_t len = T_FileStream_size(file);
+ LocalMemory<uint8_t> data;
+ if(data.allocateInsteadAndCopy(len) == NULL)
{
*status = U_MEMORY_ALLOCATION_ERROR;
T_FileStream_close (file);
return NULL;
}
- /* int32_t numRead = */ T_FileStream_read (file, data, len);
+ /* int32_t numRead = */ T_FileStream_read(file, data.getAlias(), len);
T_FileStream_close (file);
- result = bin_open(state->bundle, tag, len, data, fullname, comment, status);
-
- uprv_free(data);
- uprv_free(filename);
- uprv_free(fullname);
-
- return result;
+ return bin_open(state->bundle, tag, len, data.getAlias(), fullname.data(), comment, status);
}
static struct SResource *
uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
result = string_open(state->bundle, tag, uBuffer, len, comment, status);
+ ucbuf_close(ucbuf);
+
uprv_free(pTarget);
uprv_free(filename);
U_STRING_DECL(k_type_intvector, "intvector", 9);
U_STRING_DECL(k_type_import, "import", 6);
U_STRING_DECL(k_type_include, "include", 7);
-U_STRING_DECL(k_type_reserved, "reserved", 8);
/* Various non-standard processing plugins that create one or more special resources. */
U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
typedef enum EResourceType
{
- RT_UNKNOWN,
- RT_STRING,
- RT_BINARY,
- RT_TABLE,
- RT_TABLE_NO_FALLBACK,
- RT_INTEGER,
- RT_ARRAY,
- RT_ALIAS,
- RT_INTVECTOR,
- RT_IMPORT,
- RT_INCLUDE,
- RT_PROCESS_UCA_RULES,
- RT_PROCESS_COLLATION,
- RT_PROCESS_TRANSLITERATOR,
- RT_PROCESS_DEPENDENCY,
- RT_RESERVED
+ RESTYPE_UNKNOWN,
+ RESTYPE_STRING,
+ RESTYPE_BINARY,
+ RESTYPE_TABLE,
+ RESTYPE_TABLE_NO_FALLBACK,
+ RESTYPE_INTEGER,
+ RESTYPE_ARRAY,
+ RESTYPE_ALIAS,
+ RESTYPE_INTVECTOR,
+ RESTYPE_IMPORT,
+ RESTYPE_INCLUDE,
+ RESTYPE_PROCESS_UCA_RULES,
+ RESTYPE_PROCESS_COLLATION,
+ RESTYPE_PROCESS_TRANSLITERATOR,
+ RESTYPE_PROCESS_DEPENDENCY,
+ RESTYPE_RESERVED
} EResourceType;
static struct {
{"reserved", NULL, NULL}
};
-void initParser(UBool omitCollationRules)
+void initParser()
{
U_STRING_INIT(k_type_string, "string", 6);
U_STRING_INIT(k_type_binary, "binary", 6);
U_STRING_INIT(k_type_alias, "alias", 5);
U_STRING_INIT(k_type_intvector, "intvector", 9);
U_STRING_INIT(k_type_import, "import", 6);
- U_STRING_INIT(k_type_reserved, "reserved", 8);
U_STRING_INIT(k_type_include, "include", 7);
U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18);
U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18);
U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23);
U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19);
-
- gOmitCollationRules = omitCollationRules;
}
static inline UBool isTable(enum EResourceType type) {
- return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK);
+ return (UBool)(type==RESTYPE_TABLE || type==RESTYPE_TABLE_NO_FALLBACK);
}
static enum EResourceType
{
struct UString *tokenValue;
struct UString comment;
- enum EResourceType result = RT_UNKNOWN;
+ enum EResourceType result = RESTYPE_UNKNOWN;
uint32_t line=0;
ustr_init(&comment);
expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
if (U_FAILURE(*status))
{
- return RT_UNKNOWN;
+ return RESTYPE_UNKNOWN;
}
*status = U_ZERO_ERROR;
/* Search for normal types */
- result=RT_UNKNOWN;
- while ((result=(EResourceType)(result+1)) < RT_RESERVED) {
+ result=RESTYPE_UNKNOWN;
+ while ((result=(EResourceType)(result+1)) < RESTYPE_RESERVED) {
if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
break;
}
}
/* Now search for the aliases */
if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
- result = RT_INTEGER;
+ result = RESTYPE_INTEGER;
}
else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
- result = RT_BINARY;
+ result = RESTYPE_BINARY;
}
- else if (result == RT_RESERVED) {
+ else if (result == RESTYPE_RESERVED) {
char tokenBuffer[1024];
u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
{
enum ETokenType token;
- enum EResourceType resType = RT_UNKNOWN;
+ enum EResourceType resType = RESTYPE_UNKNOWN;
ParseResourceFunction *parseFunction = NULL;
struct UString *tokenValue;
uint32_t startline;
}
- if (resType == RT_UNKNOWN)
+ if (resType == RESTYPE_UNKNOWN)
{
/* No explicit type, so try to work it out. At this point, we've read the first '{'.
We could have any of the following:
if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
{
- resType = RT_ARRAY;
+ resType = RESTYPE_ARRAY;
}
else if (token == TOK_STRING)
{
switch (token)
{
- case TOK_COMMA: resType = RT_ARRAY; break;
- case TOK_OPEN_BRACE: resType = RT_TABLE; break;
- case TOK_CLOSE_BRACE: resType = RT_STRING; break;
- case TOK_COLON: resType = RT_TABLE; break;
+ case TOK_COMMA: resType = RESTYPE_ARRAY; break;
+ case TOK_OPEN_BRACE: resType = RESTYPE_TABLE; break;
+ case TOK_CLOSE_BRACE: resType = RESTYPE_STRING; break;
+ case TOK_COLON: resType = RESTYPE_TABLE; break;
default:
*status = U_INVALID_FORMAT_ERROR;
error(line, "Unexpected token after string, expected ',', '{' or '}'");
}
/* printf("Type guessed as %s\n", resourceNames[resType]); */
- } else if(resType == RT_TABLE_NO_FALLBACK) {
+ } else if(resType == RESTYPE_TABLE_NO_FALLBACK) {
*status = U_INVALID_FORMAT_ERROR;
error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
return NULL;
/* parse the top-level resource */
struct SRBRoot *
-parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UBool makeBinaryCollation,
- UErrorCode *status)
+parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, const char *filename,
+ UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status)
{
struct UString *tokenValue;
struct UString comment;
state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
state.outputdir = outputDir;
state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
+ state.filename = filename;
state.makeBinaryCollation = makeBinaryCollation;
+ state.omitCollationRules = omitCollationRules;
ustr_init(&comment);
expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
- state.bundle = bundle_open(&comment, FALSE, status);
+ state.bundle = new SRBRoot(&comment, FALSE, *status);
if (state.bundle == NULL || U_FAILURE(*status))
{
}
- bundle_setlocale(state.bundle, tokenValue->fChars, status);
+ state.bundle->setLocale(tokenValue->fChars, *status);
/* The following code is to make Empty bundle work no matter with :table specifer or not */
token = getToken(&state, NULL, NULL, &line, status);
if(token==TOK_OPEN_BRACE)
{
*status=U_ZERO_ERROR;
- bundleType=RT_TABLE;
+ bundleType=RESTYPE_TABLE;
}
else
{
/* neither colon nor open brace */
*status=U_PARSE_ERROR;
- bundleType=RT_UNKNOWN;
+ bundleType=RESTYPE_UNKNOWN;
error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
}
}
if (U_FAILURE(*status))
{
- bundle_close(state.bundle, status);
+ delete state.bundle;
return NULL;
}
- if(bundleType==RT_TABLE_NO_FALLBACK) {
+ if(bundleType==RESTYPE_TABLE_NO_FALLBACK) {
/*
* Parse a top-level table with the table(nofallback) declaration.
* This is the same as a regular table, but also sets the
* URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
*/
- state.bundle->noFallback=TRUE;
+ state.bundle->fNoFallback=TRUE;
}
/* top-level tables need not handle special table names like "collations" */
- realParseTable(&state, state.bundle->fRoot, NULL, line, status);
+ assert(!state.bundle->fIsPoolBundle);
+ assert(state.bundle->fRoot->fType == URES_TABLE);
+ TableResource *rootTable = static_cast<TableResource *>(state.bundle->fRoot);
+ realParseTable(&state, rootTable, NULL, line, status);
if(dependencyArray!=NULL){
- table_add(state.bundle->fRoot, dependencyArray, 0, status);
+ rootTable->add(dependencyArray, 0, *status);
dependencyArray = NULL;
}
if (U_FAILURE(*status))
{
- bundle_close(state.bundle, status);
+ delete state.bundle;
res_close(dependencyArray);
return NULL;
}