/*
*******************************************************************************
*
-* Copyright (C) 1998-2004, International Business Machines
+* Copyright (C) 1998-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
#include "read.h"
#include "ustr.h"
#include "reslist.h"
+#include "rbt_pars.h"
+#include "genrb.h"
#include "unicode/ustring.h"
+#include "unicode/uscript.h"
#include "unicode/putil.h"
+#include <stdio.h>
+
+extern UBool gIncludeUnihanColl;
/* Number of tokens to read ahead of the current stream position */
#define MAX_LOOKAHEAD 3
-#define U_ICU_UNIDATA "unidata"
#define CR 0x000D
#define LF 0x000A
#define SPACE 0x0020
#define ESCAPE 0x005C
#define HASH 0x0023
#define QUOTE 0x0027
+#define ZERO 0x0030
#define STARTCOMMAND 0x005B
#define ENDCOMMAND 0x005D
-
-U_STRING_DECL(k_type_string, "string", 6);
-U_STRING_DECL(k_type_binary, "binary", 6);
-U_STRING_DECL(k_type_bin, "bin", 3);
-U_STRING_DECL(k_type_table, "table", 5);
-U_STRING_DECL(k_type_int, "int", 3);
-U_STRING_DECL(k_type_integer, "integer", 7);
-U_STRING_DECL(k_type_array, "array", 5);
-U_STRING_DECL(k_type_alias, "alias", 5);
-U_STRING_DECL(k_type_intvector, "intvector", 9);
-U_STRING_DECL(k_type_import, "import", 6);
-U_STRING_DECL(k_type_include, "include", 7);
-U_STRING_DECL(k_type_reserved, "reserved", 8);
-
-enum EResourceType
-{
- RT_UNKNOWN,
- RT_STRING,
- RT_BINARY,
- RT_TABLE,
- RT_INTEGER,
- RT_ARRAY,
- RT_ALIAS,
- RT_INTVECTOR,
- RT_IMPORT,
- RT_INCLUDE,
- RT_RESERVED
-};
-
-/* only used for debugging */
-const char *resourceNames[] =
-{
- "Unknown",
- "String",
- "Binary",
- "Table",
- "Integer",
- "Array",
- "Alias",
- "Int vector",
- "Import",
- "Include",
- "Reserved",
-};
+#define OPENSQBRACKET 0x005B
+#define CLOSESQBRACKET 0x005D
struct Lookahead
{
/* Just to store "TRUE" */
static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
-static struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
-static uint32_t lookaheadPosition;
-static UCHARBUF *buffer;
-
-static struct SRBRoot *bundle;
-static const char *inputdir;
-static uint32_t inputdirLength;
+typedef struct {
+ struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
+ uint32_t lookaheadPosition;
+ UCHARBUF *buffer;
+ struct SRBRoot *bundle;
+ const char *inputdir;
+ uint32_t inputdirLength;
+ const char *outputdir;
+ uint32_t outputdirLength;
+} ParseState;
static UBool gMakeBinaryCollation = TRUE;
+static UBool gOmitCollationRules = FALSE;
-static struct SResource *parseResource(char *tag, const struct UString *comment, UErrorCode *status);
+typedef struct SResource *
+ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
-void initParser(UBool makeBinaryCollation)
-{
- uint32_t i;
-
- U_STRING_INIT(k_type_string, "string", 6);
- U_STRING_INIT(k_type_binary, "binary", 6);
- U_STRING_INIT(k_type_bin, "bin", 3);
- U_STRING_INIT(k_type_table, "table", 5);
- U_STRING_INIT(k_type_int, "int", 3);
- U_STRING_INIT(k_type_integer, "integer", 7);
- U_STRING_INIT(k_type_array, "array", 5);
- U_STRING_INIT(k_type_alias, "alias", 5);
- U_STRING_INIT(k_type_intvector, "intvector", 9);
- U_STRING_INIT(k_type_import, "import", 6);
- U_STRING_INIT(k_type_reserved, "reserved", 8);
- U_STRING_INIT(k_type_include, "include", 7);
- for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
- {
- ustr_init(&lookahead[i].value);
- }
- gMakeBinaryCollation = makeBinaryCollation;
-}
+static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
/* The nature of the lookahead buffer:
There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
getToken(NULL, NULL, status); bad - value is now a different string
*/
static void
-initLookahead(UCHARBUF *buf, UErrorCode *status)
+initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
{
static uint32_t initTypeStrings = 0;
uint32_t i;
initTypeStrings = 1;
}
- lookaheadPosition = 0;
- buffer = buf;
+ state->lookaheadPosition = 0;
+ state->buffer = buf;
resetLineNumber();
for (i = 0; i < MAX_LOOKAHEAD; i++)
{
- lookahead[i].type = getNextToken(buffer, &lookahead[i].value, &lookahead[i].line, &lookahead[i].comment, status);
+ state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
if (U_FAILURE(*status))
{
return;
*status = U_ZERO_ERROR;
}
+static void
+cleanupLookahead(ParseState* state)
+{
+ uint32_t i;
+ for (i = 0; i < MAX_LOOKAHEAD; i++)
+ {
+ ustr_deinit(&state->lookahead[i].value);
+ ustr_deinit(&state->lookahead[i].comment);
+ }
+
+}
+
static enum ETokenType
-getToken(struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
+getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
{
enum ETokenType result;
uint32_t i;
- result = lookahead[lookaheadPosition].type;
+ result = state->lookahead[state->lookaheadPosition].type;
if (tokenValue != NULL)
{
- *tokenValue = &lookahead[lookaheadPosition].value;
+ *tokenValue = &state->lookahead[state->lookaheadPosition].value;
}
if (linenumber != NULL)
{
- *linenumber = lookahead[lookaheadPosition].line;
+ *linenumber = state->lookahead[state->lookaheadPosition].line;
}
if (comment != NULL)
{
- ustr_cpy(comment, &(lookahead[lookaheadPosition].comment), status);
+ ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
}
- i = (lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
- lookaheadPosition = (lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
- ustr_setlen(&lookahead[i].comment, 0, status);
- ustr_setlen(&lookahead[i].value, 0, status);
- lookahead[i].type = getNextToken(buffer, &lookahead[i].value, &lookahead[i].line, &lookahead[i].comment, status);
+ i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
+ state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
+ ustr_setlen(&state->lookahead[i].comment, 0, status);
+ ustr_setlen(&state->lookahead[i].value, 0, status);
+ state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
/* printf("getToken, returning %s\n", tokenNames[result]); */
}
static enum ETokenType
-peekToken(uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
+peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
{
- uint32_t i = (lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
+ uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
if (U_FAILURE(*status))
{
if (tokenValue != NULL)
{
- *tokenValue = &lookahead[i].value;
+ *tokenValue = &state->lookahead[i].value;
}
if (linenumber != NULL)
{
- *linenumber = lookahead[i].line;
+ *linenumber = state->lookahead[i].line;
}
if(comment != NULL){
- ustr_cpy(comment, &(lookahead[lookaheadPosition].comment), status);
+ ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
}
- return lookahead[i].type;
+ return state->lookahead[i].type;
}
static void
-expect(enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
+expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
{
uint32_t line;
- enum ETokenType token = getToken(tokenValue, comment, &line, status);
+ enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
- if (U_FAILURE(*status))
+ if (linenumber != NULL)
{
- return;
+ *linenumber = line;
}
- if (linenumber != NULL)
+ if (U_FAILURE(*status))
{
- *linenumber = line;
+ return;
}
if (token != expectedToken)
*status = U_INVALID_FORMAT_ERROR;
error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
}
- else /* "else" is added by Jing/GCL */
+ else
{
*status = U_ZERO_ERROR;
}
}
-static char *getInvariantString(uint32_t *line, struct UString *comment, UErrorCode *status)
+static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
{
struct UString *tokenValue;
char *result;
uint32_t count;
- expect(TOK_STRING, &tokenValue, comment, line, status);
+ expect(state, TOK_STRING, &tokenValue, comment, line, status);
if (U_FAILURE(*status))
{
return result;
}
-static enum EResourceType
-parseResourceType(UErrorCode *status)
-{
- struct UString *tokenValue;
- struct UString comment;
- enum EResourceType result = RT_UNKNOWN;
- uint32_t line=0;
- ustr_init(&comment);
- expect(TOK_STRING, &tokenValue, &comment, &line, status);
-
- if (U_FAILURE(*status))
- {
- return RT_UNKNOWN;
- }
-
- *status = U_ZERO_ERROR;
-
- if (u_strcmp(tokenValue->fChars, k_type_string) == 0) {
- result = RT_STRING;
- } else if (u_strcmp(tokenValue->fChars, k_type_array) == 0) {
- result = RT_ARRAY;
- } else if (u_strcmp(tokenValue->fChars, k_type_alias) == 0) {
- result = RT_ALIAS;
- } else if (u_strcmp(tokenValue->fChars, k_type_table) == 0) {
- result = RT_TABLE;
- } else if (u_strcmp(tokenValue->fChars, k_type_binary) == 0) {
- result = RT_BINARY;
- } else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
- result = RT_BINARY;
- } else if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
- result = RT_INTEGER;
- } else if (u_strcmp(tokenValue->fChars, k_type_integer) == 0) {
- result = RT_INTEGER;
- } else if (u_strcmp(tokenValue->fChars, k_type_intvector) == 0) {
- result = RT_INTVECTOR;
- } else if (u_strcmp(tokenValue->fChars, k_type_import) == 0) {
- result = RT_IMPORT;
- } else if (u_strcmp(tokenValue->fChars, k_type_include) == 0) {
- result = RT_INCLUDE;
- } else if (u_strcmp(tokenValue->fChars, k_type_reserved) == 0) {
- result = RT_RESERVED;
- } else {
- char tokenBuffer[1024];
- u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
- tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
- *status = U_INVALID_FORMAT_ERROR;
- error(line, "unknown resource type '%s'", tokenBuffer);
- }
-
- return result;
-}
-
static struct SResource *
-parseUCARules(char *tag, uint32_t startline, UErrorCode *status)
+parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
{
struct SResource *result = NULL;
struct UString *tokenValue;
- struct UString comment;
FileStream *file = NULL;
char filename[256] = { '\0' };
char cs[128] = { '\0' };
UChar *targetLimit = NULL;
int32_t size = 0;
- ustr_init(&comment);
- expect(TOK_STRING, &tokenValue, &comment, &line, status);
+ expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
if(isVerbose()){
printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
return NULL;
}
/* make the filename including the directory */
- if (inputdir != NULL)
+ if (state->inputdir != NULL)
{
- uprv_strcat(filename, inputdir);
+ uprv_strcat(filename, state->inputdir);
- if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
+ if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
{
uprv_strcat(filename, U_FILE_SEP_STRING);
}
u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
- expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
+ expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
if (U_FAILURE(*status))
{
return NULL;
}
- uprv_strcat(filename,"..");
- uprv_strcat(filename,U_FILE_SEP_STRING);
- uprv_strcat(filename, U_ICU_UNIDATA);
- uprv_strcat(filename, U_FILE_SEP_STRING);
uprv_strcat(filename, cs);
+ if(gOmitCollationRules) {
+ return res_none();
+ }
ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
* since the actual size needed for storing UChars
* is not known in UTF-8 byte stream
*/
- size = ucbuf_size(ucbuf);
+ size = ucbuf_size(ucbuf) + 1;
pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
target = pTarget;
{
c = ucbuf_getc(ucbuf, status);
if(c == QUOTE) {
- quoted = (UBool)!quoted;
+ quoted = (UBool)!quoted;
}
/* weiv (06/26/2002): adding the following:
* - preserving spaces in commands [...]
*/
if (c == STARTCOMMAND && !quoted)
{
- /* preserve commands
- * closing bracket will be handled by the
- * append at the end of the loop
- */
- while(c != ENDCOMMAND) {
- U_APPEND_CHAR32(c, target,len);
- c = ucbuf_getc(ucbuf, status);
- }
- } else if (c == HASH && !quoted) {
- /* skip comments */
- while(c != CR && c != LF) {
- c = ucbuf_getc(ucbuf, status);
- }
- continue;
- } else if (c == ESCAPE)
+ /* preserve commands
+ * closing bracket will be handled by the
+ * append at the end of the loop
+ */
+ while(c != ENDCOMMAND) {
+ U_APPEND_CHAR32(c, target,len);
+ c = ucbuf_getc(ucbuf, status);
+ }
+ }
+ else if (c == HASH && !quoted) {
+ /* skip comments */
+ while(c != CR && c != LF) {
+ c = ucbuf_getc(ucbuf, status);
+ }
+ continue;
+ }
+ else if (c == ESCAPE)
{
c = unescape(ucbuf, status);
}
else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
{
- /* ignore spaces carriage returns
- * and line feed unless in the form \uXXXX
+ /* ignore spaces carriage returns
+ * and line feed unless in the form \uXXXX
*/
continue;
}
*target = 0x0000;
}
- result = string_open(bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
+ result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
ucbuf_close(ucbuf);
}
static struct SResource *
-parseString(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
+parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
{
+ struct SResource *result = NULL;
struct UString *tokenValue;
+ FileStream *file = NULL;
+ char filename[256] = { '\0' };
+ char cs[128] = { '\0' };
+ uint32_t line;
+ UCHARBUF *ucbuf=NULL;
+ const char* cp = NULL;
+ UChar *pTarget = NULL;
+ const UChar *pSource = NULL;
+ int32_t size = 0;
+
+ expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
+
+ if(isVerbose()){
+ printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
+ }
+
+ if (U_FAILURE(*status))
+ {
+ return NULL;
+ }
+ /* make the filename including the directory */
+ if (state->inputdir != NULL)
+ {
+ uprv_strcat(filename, state->inputdir);
+
+ if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
+ {
+ uprv_strcat(filename, U_FILE_SEP_STRING);
+ }
+ }
+
+ u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
+
+ expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
+
+ if (U_FAILURE(*status))
+ {
+ return NULL;
+ }
+ uprv_strcat(filename, cs);
+
+
+ ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
+
+ if (U_FAILURE(*status)) {
+ error(line, "An error occured while opening the input file %s\n", filename);
+ return NULL;
+ }
+
+ /* We allocate more space than actually required
+ * since the actual size needed for storing UChars
+ * is not known in UTF-8 byte stream
+ */
+ pSource = ucbuf_getBuffer(ucbuf, &size, status);
+ pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
+ uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
+
+#if !UCONFIG_NO_TRANSLITERATION
+ size = utrans_stripRules(pSource, size, pTarget, status);
+#else
+ size = 0;
+ fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
+#endif
+ result = string_open(state->bundle, tag, pTarget, size, NULL, status);
+
+ ucbuf_close(ucbuf);
+ uprv_free(pTarget);
+ T_FileStream_close(file);
+
+ return result;
+}
+static struct SResource* dependencyArray = NULL;
+
+static struct SResource *
+parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
+{
struct SResource *result = NULL;
+ struct SResource *elem = NULL;
+ struct UString *tokenValue;
+ uint32_t line;
+ char filename[256] = { '\0' };
+ char cs[128] = { '\0' };
+
+ expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
- if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
+ if(isVerbose()){
+ printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
+ }
+
+ if (U_FAILURE(*status))
{
- return parseUCARules(tag, startline, status);
+ return NULL;
}
+ /* make the filename including the directory */
+ if (state->outputdir != NULL)
+ {
+ uprv_strcat(filename, state->outputdir);
+
+ if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
+ {
+ uprv_strcat(filename, U_FILE_SEP_STRING);
+ }
+ }
+
+ u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
+
+ if (U_FAILURE(*status))
+ {
+ return NULL;
+ }
+ uprv_strcat(filename, cs);
+ if(!T_FileStream_file_exists(filename)){
+ if(isStrict()){
+ error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
+ }else{
+ warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
+ }
+ }
+ if(dependencyArray==NULL){
+ dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
+ }
+ if(tag!=NULL){
+ result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
+ }
+ elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
+
+ array_add(dependencyArray, elem, status);
+
+ if (U_FAILURE(*status))
+ {
+ return NULL;
+ }
+ expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
+ return result;
+}
+static struct SResource *
+parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
+{
+ struct UString *tokenValue;
+ struct SResource *result = NULL;
+
+/* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
+ {
+ return parseUCARules(tag, startline, status);
+ }*/
if(isVerbose()){
printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
}
- expect(TOK_STRING, &tokenValue, NULL, NULL, status);
+ expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
if (U_SUCCESS(*status))
{
/* create the string now - tokenValue doesn't survive a call to getToken (and therefore
doesn't survive expect either) */
- result = string_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
+ result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
if(U_SUCCESS(*status) && result) {
- expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
+ expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
- if (U_FAILURE(*status))
- {
- string_close(result, status);
- return NULL;
- }
+ if (U_FAILURE(*status))
+ {
+ res_close(result);
+ return NULL;
+ }
}
}
}
static struct SResource *
-parseAlias(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
+parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
{
struct UString *tokenValue;
struct SResource *result = NULL;
- expect(TOK_STRING, &tokenValue, NULL, NULL, status);
+ expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
if(isVerbose()){
printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
/* create the string now - tokenValue doesn't survive a call to getToken (and therefore
doesn't survive expect either) */
- result = alias_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
+ result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
- expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
+ expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
if (U_FAILURE(*status))
{
- alias_close(result, status);
+ res_close(result);
return NULL;
}
}
return result;
}
+typedef struct{
+ const char* inputDir;
+ const char* outputDir;
+} GenrbData;
+
+static struct SResource* resLookup(struct SResource* res, const char* key){
+ struct SResource *current = NULL;
+ struct SResTable *list;
+ if (res == res_none()) {
+ return NULL;
+ }
+
+ list = &(res->u.fTable);
+
+ current = list->fFirst;
+ while (current != NULL) {
+ if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
+ return current;
+ }
+ current = current->fNext;
+ }
+ return NULL;
+}
+
+static const UChar* importFromDataFile(void* context, const char* locale, const char* type, int32_t* pLength, UErrorCode* status){
+ struct SRBRoot *data = NULL;
+ UCHARBUF *ucbuf = NULL;
+ GenrbData* genrbdata = (GenrbData*) context;
+ int localeLength = strlen(locale);
+ char* filename = (char*)uprv_malloc(localeLength+5);
+ char *inputDirBuf = NULL;
+ char *openFileName = NULL;
+ const char* cp = "";
+ UChar* urules = NULL;
+ int32_t urulesLength = 0;
+ int32_t i = 0;
+ int32_t dirlen = 0;
+ int32_t filelen = 0;
+ struct SResource* root;
+ struct SResource* collations;
+ struct SResource* collation;
+ struct SResource* sequence;
+
+ memcpy(filename, locale, localeLength);
+ for(i = 0; i < localeLength; i++){
+ if(filename[i] == '-'){
+ filename[i] = '_';
+ }
+ }
+ filename[localeLength] = '.';
+ filename[localeLength+1] = 't';
+ filename[localeLength+2] = 'x';
+ filename[localeLength+3] = 't';
+ filename[localeLength+4] = 0;
+
+
+ if (status==NULL || U_FAILURE(*status)) {
+ return NULL;
+ }
+ if(filename==NULL){
+ *status=U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }else{
+ filelen = (int32_t)uprv_strlen(filename);
+ }
+ if(genrbdata->inputDir == NULL) {
+ const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
+ openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
+ openFileName[0] = '\0';
+ if (filenameBegin != NULL) {
+ /*
+ * When a filename ../../../data/root.txt is specified,
+ * we presume that the input directory is ../../../data
+ * This is very important when the resource file includes
+ * another file, like UCARules.txt or thaidict.brk.
+ */
+ int32_t filenameSize = (int32_t)(filenameBegin - filename + 1);
+ inputDirBuf = uprv_strncpy((char *)uprv_malloc(filenameSize), filename, filenameSize);
+
+ /* test for NULL */
+ if(inputDirBuf == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ goto finish;
+ }
+
+ inputDirBuf[filenameSize - 1] = 0;
+ genrbdata->inputDir = inputDirBuf;
+ dirlen = (int32_t)uprv_strlen(genrbdata->inputDir);
+ }
+ }else{
+ dirlen = (int32_t)uprv_strlen(genrbdata->inputDir);
+
+ if(genrbdata->inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
+ openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
+
+ /* test for NULL */
+ if(openFileName == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ goto finish;
+ }
+
+ openFileName[0] = '\0';
+ /*
+ * append the input dir to openFileName if the first char in
+ * filename is not file seperation char and the last char input directory is not '.'.
+ * This is to support :
+ * genrb -s. /home/icu/data
+ * genrb -s. icu/data
+ * The user cannot mix notations like
+ * genrb -s. /icu/data --- the absolute path specified. -s redundant
+ * user should use
+ * genrb -s. icu/data --- start from CWD and look in icu/data dir
+ */
+ if( (filename[0] != U_FILE_SEP_CHAR) && (genrbdata->inputDir[dirlen-1] !='.')){
+ uprv_strcpy(openFileName, genrbdata->inputDir);
+ openFileName[dirlen] = U_FILE_SEP_CHAR;
+ }
+ openFileName[dirlen + 1] = '\0';
+ } else {
+ openFileName = (char *) uprv_malloc(dirlen + filelen + 1);
+
+ /* test for NULL */
+ if(openFileName == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ goto finish;
+ }
+
+ uprv_strcpy(openFileName, genrbdata->inputDir);
+
+ }
+ }
+ uprv_strcat(openFileName, filename);
+ /* printf("%s\n", openFileName); */
+ *status = U_ZERO_ERROR;
+ ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, status);
+
+ if(*status == U_FILE_ACCESS_ERROR) {
+
+ fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName);
+ goto finish;
+ }
+ if (ucbuf == NULL || U_FAILURE(*status)) {
+ fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(*status));
+ goto finish;
+ }
+
+ /* Parse the data into an SRBRoot */
+ data = parse(ucbuf, genrbdata->inputDir, genrbdata->outputDir, status);
+
+ root = data->fRoot;
+ collations = resLookup(root, "collations");
+ collation = resLookup(collations, type);
+ sequence = resLookup(collation, "Sequence");
+ urules = sequence->u.fString.fChars;
+ urulesLength = sequence->u.fString.fLength;
+ *pLength = urulesLength;
+
+finish:
+
+ if (inputDirBuf != NULL) {
+ uprv_free(inputDirBuf);
+ }
+
+ if (openFileName != NULL) {
+ uprv_free(openFileName);
+ }
+
+ if(ucbuf) {
+ ucbuf_close(ucbuf);
+ }
+
+ return urules;
+}
+
static struct SResource *
-addCollation(struct SResource *result, uint32_t startline, UErrorCode *status)
+addCollation(ParseState* state, struct SResource *result, uint32_t startline, UErrorCode *status)
{
struct SResource *member = NULL;
struct UString *tokenValue;
UVersionInfo version;
UBool override = FALSE;
uint32_t line;
+ GenrbData genrbdata;
/* '{' . (name resource)* '}' */
version[0]=0; version[1]=0; version[2]=0; version[3]=0;
for (;;)
{
ustr_init(&comment);
- token = getToken(&tokenValue, &comment, &line, status);
+ token = getToken(state, &tokenValue, &comment, &line, status);
if (token == TOK_CLOSE_BRACE)
{
if (token != TOK_STRING)
{
- table_close(result, status);
+ res_close(result);
*status = U_INVALID_FORMAT_ERROR;
if (token == TOK_EOF)
if (U_FAILURE(*status))
{
- table_close(result, status);
+ res_close(result);
return NULL;
}
- member = parseResource(subtag, NULL, status);
+ member = parseResource(state, subtag, NULL, status);
if (U_FAILURE(*status))
{
- table_close(result, status);
+ res_close(result);
return NULL;
}
}
else if (uprv_strcmp(subtag, "Sequence") == 0)
{
-#if UCONFIG_NO_COLLATION
- warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION, see uconfig.h");
+#if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
+ warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
#else
- /* first we add the "Sequence", so that we always have rules */
- table_add(result, member, line, status);
if(gMakeBinaryCollation) {
UErrorCode intStatus = U_ZERO_ERROR;
int32_t len = 0;
uint8_t *data = NULL;
UCollator *coll = NULL;
+ int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)];
+ uint32_t reorderCodeCount;
+ int32_t reorderCodeIndex;
UParseError parseError;
- /* add sequence */
- /*table_add(result, member, line, status);*/
- coll = ucol_openRules(member->u.fString.fChars, member->u.fString.fLength,
- UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, &intStatus);
+ genrbdata.inputDir = state->inputdir;
+ genrbdata.outputDir = state->outputdir;
+
+ coll = ucol_openRulesForImport(member->u.fString.fChars, member->u.fString.fLength,
+ UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, importFromDataFile, &genrbdata, &intStatus);
if (U_SUCCESS(intStatus) && coll != NULL)
{
len = ucol_cloneBinary(coll, NULL, 0, &intStatus);
data = (uint8_t *)uprv_malloc(len);
+ intStatus = U_ZERO_ERROR;
len = ucol_cloneBinary(coll, data, len, &intStatus);
/*data = ucol_cloneRuleData(coll, &len, &intStatus);*/
if (U_SUCCESS(intStatus) && data != NULL)
{
- member = bin_open(bundle, "%%CollationBin", len, data, NULL, NULL, status);
- /*table_add(bundle->fRoot, member, line, status);*/
- table_add(result, member, line, status);
+ struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", len, data, NULL, NULL, status);
+ table_add(result, collationBin, line, status);
uprv_free(data);
+
+ reorderCodeCount = ucol_getReorderCodes(
+ coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST), &intStatus);
+ if (U_SUCCESS(intStatus) && reorderCodeCount > 0) {
+ struct SResource *reorderCodeRes = intvector_open(state->bundle, "%%ReorderCodes", NULL, status);
+ for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodeCount; reorderCodeIndex++) {
+ intvector_add(reorderCodeRes, reorderCodes[reorderCodeIndex], status);
+ }
+ table_add(result, reorderCodeRes, line, status);
+ }
}
else
{
}
else
{
+ if(intStatus == U_FILE_ACCESS_ERROR) {
+ error(startline, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly.");
+ *status = intStatus;
+ return NULL;
+ }
warning(line, "%%Collation could not be constructed from CollationElements - check context!");
if(isStrict()){
*status = intStatus;
}
}
#endif
+ /* in order to achieve smaller data files, we can direct genrb */
+ /* to omit collation rules */
+ if(gOmitCollationRules) {
+ bundle_closeString(state->bundle, member);
+ } else {
+ table_add(result, member, line, status);
+ }
}
/*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
if (U_FAILURE(*status))
{
- table_close(result, status);
+ res_close(result);
return NULL;
}
}
}
static struct SResource *
-parseCollationElements(char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
+parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
{
struct SResource *result = NULL;
struct SResource *member = NULL;
char subtag[1024], typeKeyword[1024];
uint32_t line;
- result = table_open(bundle, tag, NULL, status);
+ result = table_open(state->bundle, tag, NULL, status);
if (result == NULL || U_FAILURE(*status))
{
printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
}
if(!newCollation) {
- return addCollation(result, startline, status);
- } else {
- for(;;) {
- ustr_init(&comment);
- token = getToken(&tokenValue, &comment, &line, status);
-
- if (token == TOK_CLOSE_BRACE)
- {
- return result;
- }
-
- if (token != TOK_STRING)
- {
- table_close(result, status);
- *status = U_INVALID_FORMAT_ERROR;
+ return addCollation(state, result, startline, status);
+ }
+ else {
+ for(;;) {
+ ustr_init(&comment);
+ token = getToken(state, &tokenValue, &comment, &line, status);
- if (token == TOK_EOF)
+ if (token == TOK_CLOSE_BRACE)
{
- error(startline, "unterminated table");
+ return result;
}
- else
+
+ if (token != TOK_STRING)
{
- error(line, "Unexpected token %s", tokenNames[token]);
+ res_close(result);
+ *status = U_INVALID_FORMAT_ERROR;
+
+ if (token == TOK_EOF)
+ {
+ error(startline, "unterminated table");
+ }
+ else
+ {
+ error(line, "Unexpected token %s", tokenNames[token]);
+ }
+
+ return NULL;
}
- return NULL;
- }
+ u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
- u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
+ if (U_FAILURE(*status))
+ {
+ res_close(result);
+ return NULL;
+ }
- if (U_FAILURE(*status))
- {
- table_close(result, status);
- return NULL;
- }
+ if (uprv_strcmp(subtag, "default") == 0)
+ {
+ member = parseResource(state, subtag, NULL, status);
- if (uprv_strcmp(subtag, "default") == 0)
- {
- member = parseResource(subtag, NULL, status);
+ if (U_FAILURE(*status))
+ {
+ res_close(result);
+ return NULL;
+ }
- if (U_FAILURE(*status))
- {
- table_close(result, status);
- return NULL;
- }
+ table_add(result, member, line, status);
+ }
+ else
+ {
+ token = peekToken(state, 0, &tokenValue, &line, &comment, status);
+ /* this probably needs to be refactored or recursively use the parser */
+ /* first we assume that our collation table won't have the explicit type */
+ /* then, we cannot handle aliases */
+ if(token == TOK_OPEN_BRACE) {
+ token = getToken(state, &tokenValue, &comment, &line, status);
+ collationRes = table_open(state->bundle, subtag, NULL, status);
+ collationRes = addCollation(state, collationRes, startline, status); /* need to parse the collation data regardless */
+ if (gIncludeUnihanColl || uprv_strcmp(subtag, "unihan") != 0) {
+ table_add(result, collationRes, startline, status);
+ }
+ } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
+ /* we could have a table too */
+ token = peekToken(state, 1, &tokenValue, &line, &comment, status);
+ u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
+ if(uprv_strcmp(typeKeyword, "alias") == 0) {
+ member = parseResource(state, subtag, NULL, status);
+
+ if (U_FAILURE(*status))
+ {
+ res_close(result);
+ return NULL;
+ }
- table_add(result, member, line, status);
- }
- else
- {
- token = peekToken(0, &tokenValue, &line, &comment, status);
- /* this probably needs to be refactored or recursively use the parser */
- /* first we assume that our collation table won't have the explicit type */
- /* then, we cannot handle aliases */
- if(token == TOK_OPEN_BRACE) {
- token = getToken(&tokenValue, &comment, &line, status);
- collationRes = table_open(bundle, subtag, NULL, status);
- table_add(result, addCollation(collationRes, startline, status), startline, status);
- } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
- /* we could have a table too */
- token = peekToken(1, &tokenValue, &line, &comment, status);
- u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
- if(uprv_strcmp(typeKeyword, "alias") == 0) {
- member = parseResource(subtag, NULL, status);
-
- if (U_FAILURE(*status))
- {
- table_close(result, status);
- return NULL;
- }
-
- table_add(result, member, line, status);
- } else {
- *status = U_INVALID_FORMAT_ERROR;
- return NULL;
+ table_add(result, member, line, status);
+ } else {
+ res_close(result);
+ *status = U_INVALID_FORMAT_ERROR;
+ return NULL;
+ }
+ } else {
+ res_close(result);
+ *status = U_INVALID_FORMAT_ERROR;
+ return NULL;
+ }
}
- } else {
- *status = U_INVALID_FORMAT_ERROR;
- return NULL;
- }
- }
- /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
+ /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
- /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
+ /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
- if (U_FAILURE(*status))
- {
- table_close(result, status);
- return NULL;
+ if (U_FAILURE(*status))
+ {
+ res_close(result);
+ return NULL;
+ }
}
-
- }
}
}
/* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
if this weren't special-cased, wouldn't be set until the entire file had been processed. */
static struct SResource *
-realParseTable(struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
+realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
{
struct SResource *member = NULL;
struct UString *tokenValue=NULL;
for (;;)
{
ustr_init(&comment);
- token = getToken(&tokenValue, &comment, &line, status);
+ token = getToken(state, &tokenValue, &comment, &line, status);
if (token == TOK_CLOSE_BRACE)
{
if (token != TOK_STRING)
{
- table_close(table, status);
*status = U_INVALID_FORMAT_ERROR;
if (token == TOK_EOF)
} else {
*status = U_INVALID_FORMAT_ERROR;
error(line, "invariant characters required for table keys");
- table_close(table, status);
return NULL;
}
if (U_FAILURE(*status))
{
- error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
- table_close(table, status);
+ error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
return NULL;
}
- member = parseResource(subtag, &comment, status);
+ member = parseResource(state, subtag, &comment, status);
if (member == NULL || U_FAILURE(*status))
{
- error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
- table_close(table, status);
+ error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
return NULL;
}
if (U_FAILURE(*status))
{
- error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
- table_close(table, status);
+ error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
return NULL;
}
readToken = TRUE;
+ ustr_deinit(&comment);
}
/* not reached */
}
static struct SResource *
-parseTable(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
+parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
{
struct SResource *result;
if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
{
- return parseCollationElements(tag, startline, FALSE, status);
+ return parseCollationElements(state, tag, startline, FALSE, status);
}
if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
{
- return parseCollationElements(tag, startline, TRUE, status);
+ return parseCollationElements(state, tag, startline, TRUE, status);
}
if(isVerbose()){
printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
}
- result = table_open(bundle, tag, comment, status);
+ result = table_open(state->bundle, tag, comment, status);
if (result == NULL || U_FAILURE(*status))
{
return NULL;
}
- return realParseTable(result, tag, startline, status);
+ return realParseTable(state, result, tag, startline, status);
}
static struct SResource *
-parseArray(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
+parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
{
struct SResource *result = NULL;
struct SResource *member = NULL;
enum ETokenType token;
UBool readToken = FALSE;
- result = array_open(bundle, tag, comment, status);
+ result = array_open(state->bundle, tag, comment, status);
if (result == NULL || U_FAILURE(*status))
{
ustr_setlen(&memberComments, 0, status);
/* check for end of array, but don't consume next token unless it really is the end */
- token = peekToken(0, &tokenValue, NULL, &memberComments, status);
+ token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
if (token == TOK_CLOSE_BRACE)
{
- getToken(NULL, NULL, NULL, status);
+ getToken(state, NULL, NULL, NULL, status);
if (!readToken) {
warning(startline, "Encountered empty array");
}
if (token == TOK_EOF)
{
- array_close(result, status);
+ res_close(result);
*status = U_INVALID_FORMAT_ERROR;
error(startline, "unterminated array");
return NULL;
/* string arrays are a special case */
if (token == TOK_STRING)
{
- getToken(&tokenValue, &memberComments, NULL, status);
- member = string_open(bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
+ getToken(state, &tokenValue, &memberComments, NULL, status);
+ member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
}
else
{
- member = parseResource(NULL, &memberComments, status);
+ member = parseResource(state, NULL, &memberComments, status);
}
if (member == NULL || U_FAILURE(*status))
{
- array_close(result, status);
+ res_close(result);
return NULL;
}
if (U_FAILURE(*status))
{
- array_close(result, status);
+ res_close(result);
return NULL;
}
/* eat optional comma if present */
- token = peekToken(0, NULL, NULL, NULL, status);
+ token = peekToken(state, 0, NULL, NULL, NULL, status);
if (token == TOK_COMMA)
{
- getToken(NULL, NULL, NULL, status);
+ getToken(state, NULL, NULL, NULL, status);
}
if (U_FAILURE(*status))
{
- array_close(result, status);
+ res_close(result);
return NULL;
}
readToken = TRUE;
}
+ ustr_deinit(&memberComments);
return result;
}
static struct SResource *
-parseIntVector(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
+parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
{
struct SResource *result = NULL;
enum ETokenType token;
char *string;
int32_t value;
UBool readToken = FALSE;
- /* added by Jing/GCL */
char *stopstring;
uint32_t len;
struct UString memberComments;
- result = intvector_open(bundle, tag, comment, status);
+ result = intvector_open(state->bundle, tag, comment, status);
if (result == NULL || U_FAILURE(*status))
{
ustr_setlen(&memberComments, 0, status);
/* check for end of array, but don't consume next token unless it really is the end */
- token = peekToken(0, NULL, NULL,&memberComments, status);
+ token = peekToken(state, 0, NULL, NULL,&memberComments, status);
if (token == TOK_CLOSE_BRACE)
{
/* it's the end, consume the close brace */
- getToken(NULL, NULL, NULL, status);
+ getToken(state, NULL, NULL, NULL, status);
if (!readToken) {
warning(startline, "Encountered empty int vector");
}
+ ustr_deinit(&memberComments);
return result;
}
- string = getInvariantString(NULL, NULL, status);
+ string = getInvariantString(state, NULL, NULL, status);
if (U_FAILURE(*status))
{
- intvector_close(result, status);
+ res_close(result);
return NULL;
}
- /* Commented by Jing/GCL */
- /*value = uprv_strtol(string, NULL, 10);
- intvector_add(result, value, status);
-
- uprv_free(string);
-
- token = peekToken(0, NULL, NULL, status);*/
- /* The following is added by Jing/GCL to handle illegal char in the Intvector */
+ /* For handling illegal char in the Intvector */
value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
len=(uint32_t)(stopstring-string);
{
intvector_add(result, value, status);
uprv_free(string);
- token = peekToken(0, NULL, NULL, NULL, status);
+ token = peekToken(state, 0, NULL, NULL, NULL, status);
}
else
{
uprv_free(string);
*status=U_INVALID_CHAR_FOUND;
}
- /* The above is added by Jing/GCL */
if (U_FAILURE(*status))
{
- intvector_close(result, status);
+ res_close(result);
return NULL;
}
consecutive entries) so that a missing comma on the last entry isn't an error */
if (token == TOK_COMMA)
{
- getToken(NULL, NULL, NULL, status);
+ getToken(state, NULL, NULL, NULL, status);
}
readToken = TRUE;
}
}
static struct SResource *
-parseBinary(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
+parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
{
struct SResource *result = NULL;
uint8_t *value;
uint32_t count;
uint32_t i;
uint32_t line;
- /* added by Jing/GCL */
char *stopstring;
uint32_t len;
- string = getInvariantString(&line, NULL, status);
+ string = getInvariantString(state, &line, NULL, status);
if (string == NULL || U_FAILURE(*status))
{
return NULL;
}
- expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
+ expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
if (U_FAILURE(*status))
{
}
}
- result = bin_open(bundle, tag, (i >> 1), value,NULL, comment, status);
+ result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status);
uprv_free(value);
}
}
else
{
- result = bin_open(bundle, tag, 0, NULL, "",comment,status);
+ result = bin_open(state->bundle, tag, 0, NULL, "",comment,status);
warning(startline, "Encountered empty binary tag");
}
uprv_free(string);
}
static struct SResource *
-parseInteger(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
+parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
{
struct SResource *result = NULL;
int32_t value;
char *string;
- /* added by Jing/GCL */
char *stopstring;
uint32_t len;
- string = getInvariantString(NULL, NULL, status);
+ string = getInvariantString(state, NULL, NULL, status);
if (string == NULL || U_FAILURE(*status))
{
return NULL;
}
- expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
+ expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
if (U_FAILURE(*status))
{
warning(startline, "Encountered empty integer. Default value is 0.");
}
- /* commented by Jing/GCL */
- /* value = uprv_strtol(string, NULL, 10);*/
- /* result = int_open(bundle, tag, value, status);*/
- /* The following is added by Jing/GCL*/
- /* to make integer support hexdecimal, octal digit and decimal*/
- /* to handle illegal char in the integer*/
+ /* Allow integer support for hexdecimal, octal digit and decimal*/
+ /* and handle illegal char in the integer*/
value = uprv_strtoul(string, &stopstring, 0);
len=(uint32_t)(stopstring-string);
if(len==uprv_strlen(string))
{
- result = int_open(bundle, tag, value, comment, status);
+ result = int_open(state->bundle, tag, value, comment, status);
}
else
{
}
static struct SResource *
-parseImport(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
+parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
{
struct SResource *result;
FileStream *file;
uint32_t line;
char *fullname = NULL;
int32_t numRead = 0;
- filename = getInvariantString(&line, NULL, status);
+ filename = getInvariantString(state, &line, NULL, status);
if (U_FAILURE(*status))
{
return NULL;
}
- expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
+ expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
if (U_FAILURE(*status))
{
}
/* Open the input file for reading */
- if (inputdir == NULL)
+ if (state->inputdir == NULL)
{
+#if 1
+ /*
+ * Always save file file name, even if there's
+ * no input directory specified. MIGHT BREAK SOMETHING
+ */
+ int32_t filenameLength = uprv_strlen(filename);
+
+ fullname = (char *) uprv_malloc(filenameLength + 1);
+ uprv_strcpy(fullname, filename);
+#endif
+
file = T_FileStream_open(filename, "rb");
}
else
int32_t count = (int32_t)uprv_strlen(filename);
- if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
+ if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
{
- fullname = (char *) uprv_malloc(inputdirLength + count + 2);
+ fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
/* test for NULL */
if(fullname == NULL)
return NULL;
}
- uprv_strcpy(fullname, inputdir);
+ uprv_strcpy(fullname, state->inputdir);
- fullname[inputdirLength] = U_FILE_SEP_CHAR;
- fullname[inputdirLength + 1] = '\0';
+ fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
+ fullname[state->inputdirLength + 1] = '\0';
uprv_strcat(fullname, filename);
}
else
{
- fullname = (char *) uprv_malloc(inputdirLength + count + 1);
+ fullname = (char *) uprv_malloc(state->inputdirLength + count + 1);
/* test for NULL */
if(fullname == NULL)
return NULL;
}
- uprv_strcpy(fullname, inputdir);
+ uprv_strcpy(fullname, state->inputdir);
uprv_strcat(fullname, filename);
}
numRead = T_FileStream_read (file, data, len);
T_FileStream_close (file);
- result = bin_open(bundle, tag, len, data, fullname, comment, status);
+ result = bin_open(state->bundle, tag, len, data, fullname, comment, status);
uprv_free(data);
uprv_free(filename);
}
static struct SResource *
-parseInclude(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
+parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
{
struct SResource *result;
int32_t len=0;
const char* cp = NULL;
const UChar* uBuffer = NULL;
- filename = getInvariantString(&line, NULL, status);
+ filename = getInvariantString(state, &line, NULL, status);
count = (int32_t)uprv_strlen(filename);
if (U_FAILURE(*status))
return NULL;
}
- expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
+ expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
if (U_FAILURE(*status))
{
printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
}
- fullname = (char *) uprv_malloc(inputdirLength + count + 2);
+ fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
/* test for NULL */
if(fullname == NULL)
{
return NULL;
}
- if(inputdir!=NULL){
- if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
+ if(state->inputdir!=NULL){
+ if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
{
- uprv_strcpy(fullname, inputdir);
+ uprv_strcpy(fullname, state->inputdir);
- fullname[inputdirLength] = U_FILE_SEP_CHAR;
- fullname[inputdirLength + 1] = '\0';
+ fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
+ fullname[state->inputdirLength + 1] = '\0';
uprv_strcat(fullname, filename);
}
else
{
- uprv_strcpy(fullname, inputdir);
+ uprv_strcpy(fullname, state->inputdir);
uprv_strcat(fullname, filename);
}
}else{
}
uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
- result = string_open(bundle, tag, uBuffer, len, comment, status);
+ result = string_open(state->bundle, tag, uBuffer, len, comment, status);
uprv_free(pTarget);
return result;
}
+
+
+
+
+U_STRING_DECL(k_type_string, "string", 6);
+U_STRING_DECL(k_type_binary, "binary", 6);
+U_STRING_DECL(k_type_bin, "bin", 3);
+U_STRING_DECL(k_type_table, "table", 5);
+U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17);
+U_STRING_DECL(k_type_int, "int", 3);
+U_STRING_DECL(k_type_integer, "integer", 7);
+U_STRING_DECL(k_type_array, "array", 5);
+U_STRING_DECL(k_type_alias, "alias", 5);
+U_STRING_DECL(k_type_intvector, "intvector", 9);
+U_STRING_DECL(k_type_import, "import", 6);
+U_STRING_DECL(k_type_include, "include", 7);
+U_STRING_DECL(k_type_reserved, "reserved", 8);
+
+/* Various non-standard processing plugins that create one or more special resources. */
+U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
+U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18);
+U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23);
+U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19);
+
+typedef enum EResourceType
+{
+ RT_UNKNOWN,
+ RT_STRING,
+ RT_BINARY,
+ RT_TABLE,
+ RT_TABLE_NO_FALLBACK,
+ RT_INTEGER,
+ RT_ARRAY,
+ RT_ALIAS,
+ RT_INTVECTOR,
+ RT_IMPORT,
+ RT_INCLUDE,
+ RT_PROCESS_UCA_RULES,
+ RT_PROCESS_COLLATION,
+ RT_PROCESS_TRANSLITERATOR,
+ RT_PROCESS_DEPENDENCY,
+ RT_RESERVED
+} EResourceType;
+
+static struct {
+ const char *nameChars; /* only used for debugging */
+ const UChar *nameUChars;
+ ParseResourceFunction *parseFunction;
+} gResourceTypes[] = {
+ {"Unknown", NULL, NULL},
+ {"string", k_type_string, parseString},
+ {"binary", k_type_binary, parseBinary},
+ {"table", k_type_table, parseTable},
+ {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
+ {"integer", k_type_integer, parseInteger},
+ {"array", k_type_array, parseArray},
+ {"alias", k_type_alias, parseAlias},
+ {"intvector", k_type_intvector, parseIntVector},
+ {"import", k_type_import, parseImport},
+ {"include", k_type_include, parseInclude},
+ {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
+ {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
+ {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
+ {"process(dependency)", k_type_plugin_dependency, parseDependency},
+ {"reserved", NULL, NULL}
+};
+
+void initParser(UBool omitBinaryCollation, UBool omitCollationRules)
+{
+ U_STRING_INIT(k_type_string, "string", 6);
+ U_STRING_INIT(k_type_binary, "binary", 6);
+ U_STRING_INIT(k_type_bin, "bin", 3);
+ U_STRING_INIT(k_type_table, "table", 5);
+ U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17);
+ U_STRING_INIT(k_type_int, "int", 3);
+ U_STRING_INIT(k_type_integer, "integer", 7);
+ U_STRING_INIT(k_type_array, "array", 5);
+ U_STRING_INIT(k_type_alias, "alias", 5);
+ U_STRING_INIT(k_type_intvector, "intvector", 9);
+ U_STRING_INIT(k_type_import, "import", 6);
+ U_STRING_INIT(k_type_reserved, "reserved", 8);
+ U_STRING_INIT(k_type_include, "include", 7);
+
+ U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18);
+ U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18);
+ U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23);
+ U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19);
+
+ gMakeBinaryCollation = !omitBinaryCollation;
+ gOmitCollationRules = omitCollationRules;
+}
+
+static U_INLINE UBool isTable(enum EResourceType type) {
+ return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK);
+}
+
+static enum EResourceType
+parseResourceType(ParseState* state, UErrorCode *status)
+{
+ struct UString *tokenValue;
+ struct UString comment;
+ enum EResourceType result = RT_UNKNOWN;
+ uint32_t line=0;
+ ustr_init(&comment);
+ expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
+
+ if (U_FAILURE(*status))
+ {
+ return RT_UNKNOWN;
+ }
+
+ *status = U_ZERO_ERROR;
+
+ /* Search for normal types */
+ result=RT_UNKNOWN;
+ while (++result < RT_RESERVED) {
+ if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
+ break;
+ }
+ }
+ /* Now search for the aliases */
+ if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
+ result = RT_INTEGER;
+ }
+ else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
+ result = RT_BINARY;
+ }
+ else if (result == RT_RESERVED) {
+ char tokenBuffer[1024];
+ u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
+ tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
+ *status = U_INVALID_FORMAT_ERROR;
+ error(line, "unknown resource type '%s'", tokenBuffer);
+ }
+
+ return result;
+}
+
+/* parse a non-top-level resource */
static struct SResource *
-parseResource(char *tag, const struct UString *comment, UErrorCode *status)
+parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
{
enum ETokenType token;
enum EResourceType resType = RT_UNKNOWN;
+ ParseResourceFunction *parseFunction = NULL;
struct UString *tokenValue;
uint32_t startline;
uint32_t line;
- token = getToken(&tokenValue, NULL, &startline, status);
+ token = getToken(state, &tokenValue, NULL, &startline, status);
if(isVerbose()){
printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
return NULL;
case TOK_COLON:
- resType = parseResourceType(status);
- expect(TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
+ resType = parseResourceType(state, status);
+ expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
if (U_FAILURE(*status))
{
{ :/} => array
{ string , => string array
- commented by Jing/GCL
{ string { => table
- added by Jing/GCL
-
{ string :/{ => table
{ string } => string
*/
- token = peekToken(0, NULL, &line, NULL,status);
+ token = peekToken(state, 0, NULL, &line, NULL,status);
if (U_FAILURE(*status))
{
return NULL;
}
- /* Commented by Jing/GCL */
- /* if (token == TOK_OPEN_BRACE || token == TOK_COLON )*/
if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
{
resType = RT_ARRAY;
}
else if (token == TOK_STRING)
{
- token = peekToken(1, NULL, &line, NULL, status);
+ token = peekToken(state, 1, NULL, &line, NULL, status);
if (U_FAILURE(*status))
{
case TOK_COMMA: resType = RT_ARRAY; break;
case TOK_OPEN_BRACE: resType = RT_TABLE; break;
case TOK_CLOSE_BRACE: resType = RT_STRING; break;
- /* added by Jing/GCL to make table work when :table is omitted */
case TOK_COLON: resType = RT_TABLE; break;
default:
*status = U_INVALID_FORMAT_ERROR;
}
/* printf("Type guessed as %s\n", resourceNames[resType]); */
+ } else if(resType == RT_TABLE_NO_FALLBACK) {
+ *status = U_INVALID_FORMAT_ERROR;
+ error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
+ return NULL;
}
/* We should now know what we need to parse next, so call the appropriate parser
function and return. */
- switch (resType)
- {
- case RT_STRING: return parseString (tag, startline, comment, status);
- case RT_TABLE: return parseTable (tag, startline, comment, status);
- case RT_ARRAY: return parseArray (tag, startline, comment, status);
- case RT_ALIAS: return parseAlias (tag, startline, comment, status);
- case RT_BINARY: return parseBinary (tag, startline, comment, status);
- case RT_INTEGER: return parseInteger (tag, startline, comment, status);
- case RT_IMPORT: return parseImport (tag, startline, comment, status);
- case RT_INCLUDE: return parseInclude (tag, startline, comment, status);
- case RT_INTVECTOR: return parseIntVector (tag, startline, comment, status);
-
- default:
+ parseFunction = gResourceTypes[resType].parseFunction;
+ if (parseFunction != NULL) {
+ return parseFunction(state, tag, startline, comment, status);
+ }
+ else {
*status = U_INTERNAL_PROGRAM_ERROR;
- error(startline, "internal error: unknown resource type found and not handled");
+ error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
}
return NULL;
}
+/* parse the top-level resource */
struct SRBRoot *
-parse(UCHARBUF *buf, const char *currentInputDir, UErrorCode *status)
+parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UErrorCode *status)
{
struct UString *tokenValue;
struct UString comment;
uint32_t line;
- /* added by Jing/GCL */
enum EResourceType bundleType;
enum ETokenType token;
+ ParseState state;
+ uint32_t i;
+ int encLength;
+ char* enc;
+ for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
+ {
+ ustr_init(&state.lookahead[i].value);
+ ustr_init(&state.lookahead[i].comment);
+ }
- initLookahead(buf, status);
+ initLookahead(&state, buf, status);
- inputdir = currentInputDir;
- inputdirLength = (inputdir != NULL) ? (uint32_t)uprv_strlen(inputdir) : 0;
+ state.inputdir = inputDir;
+ state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
+ state.outputdir = outputDir;
+ state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
ustr_init(&comment);
- expect(TOK_STRING, &tokenValue, &comment, NULL, status);
+ expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
- bundle = bundle_open(&comment, status);
+ state.bundle = bundle_open(&comment, FALSE, status);
- if (bundle == NULL || U_FAILURE(*status))
+ if (state.bundle == NULL || U_FAILURE(*status))
{
return NULL;
}
- bundle_setlocale(bundle, tokenValue->fChars, status);
- /* Commented by Jing/GCL */
- /* expect(TOK_OPEN_BRACE, NULL, &line, status); */
- /* The following code is to make Empty bundle work no matter with :table specifer or not */
- token = getToken(NULL, NULL, &line, status);
+ bundle_setlocale(state.bundle, tokenValue->fChars, status);
- if(token==TOK_COLON)
- {
+ /* The following code is to make Empty bundle work no matter with :table specifer or not */
+ token = getToken(&state, NULL, NULL, &line, status);
+ if(token==TOK_COLON) {
*status=U_ZERO_ERROR;
- }
- else
- {
- *status=U_PARSE_ERROR;
- }
-
- if(U_SUCCESS(*status)){
+ bundleType=parseResourceType(&state, status);
- bundleType=parseResourceType(status);
-
- if(bundleType==RT_TABLE)
+ if(isTable(bundleType))
{
- expect(TOK_OPEN_BRACE, NULL, NULL, &line, status);
+ expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
}
else
{
*status=U_PARSE_ERROR;
+ /* printf("asdsdweqdasdad\n"); */
+
error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
}
}
else
{
+ /* not a colon */
if(token==TOK_OPEN_BRACE)
{
*status=U_ZERO_ERROR;
+ bundleType=RT_TABLE;
}
else
{
+ /* neither colon nor open brace */
+ *status=U_PARSE_ERROR;
+ bundleType=RT_UNKNOWN;
error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
}
}
- /* The above is added by Jing/GCL */
if (U_FAILURE(*status))
{
- bundle_close(bundle, status);
+ bundle_close(state.bundle, status);
return NULL;
}
- realParseTable(bundle->fRoot, NULL, line, status);
+ if(bundleType==RT_TABLE_NO_FALLBACK) {
+ /*
+ * Parse a top-level table with the table(nofallback) declaration.
+ * This is the same as a regular table, but also sets the
+ * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
+ */
+ state.bundle->noFallback=TRUE;
+ }
+ /* top-level tables need not handle special table names like "collations" */
+ realParseTable(&state, state.bundle->fRoot, NULL, line, status);
+ if(dependencyArray!=NULL){
+ table_add(state.bundle->fRoot, dependencyArray, 0, status);
+ dependencyArray = NULL;
+ }
if (U_FAILURE(*status))
{
- bundle_close(bundle, status);
+ bundle_close(state.bundle, status);
+ res_close(dependencyArray);
return NULL;
}
- if (getToken(NULL, NULL, &line, status) != TOK_EOF)
+ if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
{
warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
if(isStrict()){
}
}
- return bundle;
+ cleanupLookahead(&state);
+ ustr_deinit(&comment);
+ return state.bundle;
}