2 *******************************************************************************
4 * Copyright (C) 1998-2012, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 05/26/99 stephen Creation.
15 * 02/25/00 weiv Overhaul to write udata
16 * 5/10/01 Ram removed ustdio dependency
17 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
18 *******************************************************************************
33 #include "unicode/ustring.h"
34 #include "unicode/uscript.h"
35 #include "unicode/putil.h"
38 /* Number of tokens to read ahead of the current stream position */
39 #define MAX_LOOKAHEAD 3
49 #define STARTCOMMAND 0x005B
50 #define ENDCOMMAND 0x005D
51 #define OPENSQBRACKET 0x005B
52 #define CLOSESQBRACKET 0x005D
58 struct UString comment
;
62 /* keep in sync with token defines in read.h */
63 const char *tokenNames
[TOK_TOKEN_COUNT
] =
65 "string", /* A string token, such as "MonthNames" */
66 "'{'", /* An opening brace character */
67 "'}'", /* A closing brace character */
71 "<end of file>", /* End of the file has been reached successfully */
75 /* Just to store "TRUE" */
76 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
79 struct Lookahead lookahead
[MAX_LOOKAHEAD
+ 1];
80 uint32_t lookaheadPosition
;
82 struct SRBRoot
*bundle
;
84 uint32_t inputdirLength
;
85 const char *outputdir
;
86 uint32_t outputdirLength
;
87 UBool makeBinaryCollation
;
90 static UBool gOmitCollationRules
= FALSE
;
92 typedef struct SResource
*
93 ParseResourceFunction(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
);
95 static struct SResource
*parseResource(ParseState
* state
, char *tag
, const struct UString
*comment
, UErrorCode
*status
);
97 /* The nature of the lookahead buffer:
98 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
99 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
100 When getToken is called, the current pointer is moved to the next slot and the
101 old slot is filled with the next token from the reader by calling getNextToken.
102 The token values are stored in the slot, which means that token values don't
103 survive a call to getToken, ie.
107 getToken(&value, NULL, status);
108 getToken(NULL, NULL, status); bad - value is now a different string
111 initLookahead(ParseState
* state
, UCHARBUF
*buf
, UErrorCode
*status
)
113 static uint32_t initTypeStrings
= 0;
116 if (!initTypeStrings
)
121 state
->lookaheadPosition
= 0;
126 for (i
= 0; i
< MAX_LOOKAHEAD
; i
++)
128 state
->lookahead
[i
].type
= getNextToken(state
->buffer
, &state
->lookahead
[i
].value
, &state
->lookahead
[i
].line
, &state
->lookahead
[i
].comment
, status
);
129 if (U_FAILURE(*status
))
135 *status
= U_ZERO_ERROR
;
139 cleanupLookahead(ParseState
* state
)
142 for (i
= 0; i
<= MAX_LOOKAHEAD
; i
++)
144 ustr_deinit(&state
->lookahead
[i
].value
);
145 ustr_deinit(&state
->lookahead
[i
].comment
);
150 static enum ETokenType
151 getToken(ParseState
* state
, struct UString
**tokenValue
, struct UString
* comment
, uint32_t *linenumber
, UErrorCode
*status
)
153 enum ETokenType result
;
156 result
= state
->lookahead
[state
->lookaheadPosition
].type
;
158 if (tokenValue
!= NULL
)
160 *tokenValue
= &state
->lookahead
[state
->lookaheadPosition
].value
;
163 if (linenumber
!= NULL
)
165 *linenumber
= state
->lookahead
[state
->lookaheadPosition
].line
;
170 ustr_cpy(comment
, &(state
->lookahead
[state
->lookaheadPosition
].comment
), status
);
173 i
= (state
->lookaheadPosition
+ MAX_LOOKAHEAD
) % (MAX_LOOKAHEAD
+ 1);
174 state
->lookaheadPosition
= (state
->lookaheadPosition
+ 1) % (MAX_LOOKAHEAD
+ 1);
175 ustr_setlen(&state
->lookahead
[i
].comment
, 0, status
);
176 ustr_setlen(&state
->lookahead
[i
].value
, 0, status
);
177 state
->lookahead
[i
].type
= getNextToken(state
->buffer
, &state
->lookahead
[i
].value
, &state
->lookahead
[i
].line
, &state
->lookahead
[i
].comment
, status
);
179 /* printf("getToken, returning %s\n", tokenNames[result]); */
184 static enum ETokenType
185 peekToken(ParseState
* state
, uint32_t lookaheadCount
, struct UString
**tokenValue
, uint32_t *linenumber
, struct UString
*comment
, UErrorCode
*status
)
187 uint32_t i
= (state
->lookaheadPosition
+ lookaheadCount
) % (MAX_LOOKAHEAD
+ 1);
189 if (U_FAILURE(*status
))
194 if (lookaheadCount
>= MAX_LOOKAHEAD
)
196 *status
= U_INTERNAL_PROGRAM_ERROR
;
200 if (tokenValue
!= NULL
)
202 *tokenValue
= &state
->lookahead
[i
].value
;
205 if (linenumber
!= NULL
)
207 *linenumber
= state
->lookahead
[i
].line
;
211 ustr_cpy(comment
, &(state
->lookahead
[state
->lookaheadPosition
].comment
), status
);
214 return state
->lookahead
[i
].type
;
218 expect(ParseState
* state
, enum ETokenType expectedToken
, struct UString
**tokenValue
, struct UString
*comment
, uint32_t *linenumber
, UErrorCode
*status
)
222 enum ETokenType token
= getToken(state
, tokenValue
, comment
, &line
, status
);
224 if (linenumber
!= NULL
)
229 if (U_FAILURE(*status
))
234 if (token
!= expectedToken
)
236 *status
= U_INVALID_FORMAT_ERROR
;
237 error(line
, "expecting %s, got %s", tokenNames
[expectedToken
], tokenNames
[token
]);
241 *status
= U_ZERO_ERROR
;
245 static char *getInvariantString(ParseState
* state
, uint32_t *line
, struct UString
*comment
, UErrorCode
*status
)
247 struct UString
*tokenValue
;
251 expect(state
, TOK_STRING
, &tokenValue
, comment
, line
, status
);
253 if (U_FAILURE(*status
))
258 count
= u_strlen(tokenValue
->fChars
);
259 if(!uprv_isInvariantUString(tokenValue
->fChars
, count
)) {
260 *status
= U_INVALID_FORMAT_ERROR
;
261 error(*line
, "invariant characters required for table keys, binary data, etc.");
265 result
= static_cast<char *>(uprv_malloc(count
+1));
269 *status
= U_MEMORY_ALLOCATION_ERROR
;
273 u_UCharsToChars(tokenValue
->fChars
, result
, count
+1);
277 static struct SResource
*
278 parseUCARules(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* /*comment*/, UErrorCode
*status
)
280 struct SResource
*result
= NULL
;
281 struct UString
*tokenValue
;
282 FileStream
*file
= NULL
;
283 char filename
[256] = { '\0' };
284 char cs
[128] = { '\0' };
286 UBool quoted
= FALSE
;
287 UCHARBUF
*ucbuf
=NULL
;
289 const char* cp
= NULL
;
290 UChar
*pTarget
= NULL
;
291 UChar
*target
= NULL
;
292 UChar
*targetLimit
= NULL
;
295 expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
);
298 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
301 if (U_FAILURE(*status
))
305 /* make the filename including the directory */
306 if (state
->inputdir
!= NULL
)
308 uprv_strcat(filename
, state
->inputdir
);
310 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
312 uprv_strcat(filename
, U_FILE_SEP_STRING
);
316 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
318 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
320 if (U_FAILURE(*status
))
324 uprv_strcat(filename
, cs
);
326 if(gOmitCollationRules
) {
330 ucbuf
= ucbuf_open(filename
, &cp
, getShowWarning(),FALSE
, status
);
332 if (U_FAILURE(*status
)) {
333 error(line
, "An error occured while opening the input file %s\n", filename
);
337 /* We allocate more space than actually required
338 * since the actual size needed for storing UChars
339 * is not known in UTF-8 byte stream
341 size
= ucbuf_size(ucbuf
) + 1;
342 pTarget
= (UChar
*) uprv_malloc(U_SIZEOF_UCHAR
* size
);
343 uprv_memset(pTarget
, 0, size
*U_SIZEOF_UCHAR
);
345 targetLimit
= pTarget
+size
;
347 /* read the rules into the buffer */
348 while (target
< targetLimit
)
350 c
= ucbuf_getc(ucbuf
, status
);
352 quoted
= (UBool
)!quoted
;
354 /* weiv (06/26/2002): adding the following:
355 * - preserving spaces in commands [...]
356 * - # comments until the end of line
358 if (c
== STARTCOMMAND
&& !quoted
)
361 * closing bracket will be handled by the
362 * append at the end of the loop
364 while(c
!= ENDCOMMAND
) {
365 U_APPEND_CHAR32_ONLY(c
, target
);
366 c
= ucbuf_getc(ucbuf
, status
);
369 else if (c
== HASH
&& !quoted
) {
371 while(c
!= CR
&& c
!= LF
) {
372 c
= ucbuf_getc(ucbuf
, status
);
376 else if (c
== ESCAPE
)
378 c
= unescape(ucbuf
, status
);
380 if (c
== (UChar32
)U_ERR
)
383 T_FileStream_close(file
);
387 else if (!quoted
&& (c
== SPACE
|| c
== TAB
|| c
== CR
|| c
== LF
))
389 /* ignore spaces carriage returns
390 * and line feed unless in the form \uXXXX
395 /* Append UChar * after dissembling if c > 0xffff*/
396 if (c
!= (UChar32
)U_EOF
)
398 U_APPEND_CHAR32_ONLY(c
, target
);
406 /* terminate the string */
407 if(target
< targetLimit
){
411 result
= string_open(state
->bundle
, tag
, pTarget
, (int32_t)(target
- pTarget
), NULL
, status
);
416 T_FileStream_close(file
);
421 static struct SResource
*
422 parseTransliterator(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* /*comment*/, UErrorCode
*status
)
424 struct SResource
*result
= NULL
;
425 struct UString
*tokenValue
;
426 FileStream
*file
= NULL
;
427 char filename
[256] = { '\0' };
428 char cs
[128] = { '\0' };
430 UCHARBUF
*ucbuf
=NULL
;
431 const char* cp
= NULL
;
432 UChar
*pTarget
= NULL
;
433 const UChar
*pSource
= NULL
;
436 expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
);
439 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
442 if (U_FAILURE(*status
))
446 /* make the filename including the directory */
447 if (state
->inputdir
!= NULL
)
449 uprv_strcat(filename
, state
->inputdir
);
451 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
453 uprv_strcat(filename
, U_FILE_SEP_STRING
);
457 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
459 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
461 if (U_FAILURE(*status
))
465 uprv_strcat(filename
, cs
);
468 ucbuf
= ucbuf_open(filename
, &cp
, getShowWarning(),FALSE
, status
);
470 if (U_FAILURE(*status
)) {
471 error(line
, "An error occured while opening the input file %s\n", filename
);
475 /* We allocate more space than actually required
476 * since the actual size needed for storing UChars
477 * is not known in UTF-8 byte stream
479 pSource
= ucbuf_getBuffer(ucbuf
, &size
, status
);
480 pTarget
= (UChar
*) uprv_malloc(U_SIZEOF_UCHAR
* (size
+ 1));
481 uprv_memset(pTarget
, 0, size
*U_SIZEOF_UCHAR
);
483 #if !UCONFIG_NO_TRANSLITERATION
484 size
= utrans_stripRules(pSource
, size
, pTarget
, status
);
487 fprintf(stderr
, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
489 result
= string_open(state
->bundle
, tag
, pTarget
, size
, NULL
, status
);
493 T_FileStream_close(file
);
497 static struct SResource
* dependencyArray
= NULL
;
499 static struct SResource
*
500 parseDependency(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
502 struct SResource
*result
= NULL
;
503 struct SResource
*elem
= NULL
;
504 struct UString
*tokenValue
;
506 char filename
[256] = { '\0' };
507 char cs
[128] = { '\0' };
509 expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
);
512 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
515 if (U_FAILURE(*status
))
519 /* make the filename including the directory */
520 if (state
->outputdir
!= NULL
)
522 uprv_strcat(filename
, state
->outputdir
);
524 if (state
->outputdir
[state
->outputdirLength
- 1] != U_FILE_SEP_CHAR
)
526 uprv_strcat(filename
, U_FILE_SEP_STRING
);
530 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
532 if (U_FAILURE(*status
))
536 uprv_strcat(filename
, cs
);
537 if(!T_FileStream_file_exists(filename
)){
539 error(line
, "The dependency file %s does not exist. Please make sure it exists.\n",filename
);
541 warning(line
, "The dependency file %s does not exist. Please make sure it exists.\n",filename
);
544 if(dependencyArray
==NULL
){
545 dependencyArray
= array_open(state
->bundle
, "%%DEPENDENCY", NULL
, status
);
548 result
= string_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
550 elem
= string_open(state
->bundle
, NULL
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
552 array_add(dependencyArray
, elem
, status
);
554 if (U_FAILURE(*status
))
558 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
561 static struct SResource
*
562 parseString(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
564 struct UString
*tokenValue
;
565 struct SResource
*result
= NULL
;
567 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
569 return parseUCARules(tag, startline, status);
572 printf(" string %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
574 expect(state
, TOK_STRING
, &tokenValue
, NULL
, NULL
, status
);
576 if (U_SUCCESS(*status
))
578 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
579 doesn't survive expect either) */
581 result
= string_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
582 if(U_SUCCESS(*status
) && result
) {
583 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
585 if (U_FAILURE(*status
))
596 static struct SResource
*
597 parseAlias(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
599 struct UString
*tokenValue
;
600 struct SResource
*result
= NULL
;
602 expect(state
, TOK_STRING
, &tokenValue
, NULL
, NULL
, status
);
605 printf(" alias %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
608 if (U_SUCCESS(*status
))
610 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
611 doesn't survive expect either) */
613 result
= alias_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
615 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
617 if (U_FAILURE(*status
))
628 const char* inputDir
;
629 const char* outputDir
;
632 static struct SResource
* resLookup(struct SResource
* res
, const char* key
){
633 struct SResource
*current
= NULL
;
634 struct SResTable
*list
;
635 if (res
== res_none()) {
639 list
= &(res
->u
.fTable
);
641 current
= list
->fFirst
;
642 while (current
!= NULL
) {
643 if (uprv_strcmp(((list
->fRoot
->fKeys
) + (current
->fKey
)), key
) == 0) {
646 current
= current
->fNext
;
651 static const UChar
* importFromDataFile(void* context
, const char* locale
, const char* type
, int32_t* pLength
, UErrorCode
* status
){
652 struct SRBRoot
*data
= NULL
;
653 UCHARBUF
*ucbuf
= NULL
;
654 GenrbData
* genrbdata
= (GenrbData
*) context
;
655 int localeLength
= strlen(locale
);
656 char* filename
= (char*)uprv_malloc(localeLength
+5);
657 char *inputDirBuf
= NULL
;
658 char *openFileName
= NULL
;
660 UChar
* urules
= NULL
;
661 int32_t urulesLength
= 0;
665 struct SResource
* root
;
666 struct SResource
* collations
;
667 struct SResource
* collation
;
668 struct SResource
* sequence
;
670 memcpy(filename
, locale
, localeLength
);
671 for(i
= 0; i
< localeLength
; i
++){
672 if(filename
[i
] == '-'){
676 filename
[localeLength
] = '.';
677 filename
[localeLength
+1] = 't';
678 filename
[localeLength
+2] = 'x';
679 filename
[localeLength
+3] = 't';
680 filename
[localeLength
+4] = 0;
683 if (status
==NULL
|| U_FAILURE(*status
)) {
687 *status
=U_ILLEGAL_ARGUMENT_ERROR
;
690 filelen
= (int32_t)uprv_strlen(filename
);
692 if(genrbdata
->inputDir
== NULL
) {
693 const char *filenameBegin
= uprv_strrchr(filename
, U_FILE_SEP_CHAR
);
694 openFileName
= (char *) uprv_malloc(dirlen
+ filelen
+ 2);
695 openFileName
[0] = '\0';
696 if (filenameBegin
!= NULL
) {
698 * When a filename ../../../data/root.txt is specified,
699 * we presume that the input directory is ../../../data
700 * This is very important when the resource file includes
701 * another file, like UCARules.txt or thaidict.brk.
703 int32_t filenameSize
= (int32_t)(filenameBegin
- filename
+ 1);
704 inputDirBuf
= uprv_strncpy((char *)uprv_malloc(filenameSize
), filename
, filenameSize
);
707 if(inputDirBuf
== NULL
) {
708 *status
= U_MEMORY_ALLOCATION_ERROR
;
712 inputDirBuf
[filenameSize
- 1] = 0;
713 genrbdata
->inputDir
= inputDirBuf
;
714 dirlen
= (int32_t)uprv_strlen(genrbdata
->inputDir
);
717 dirlen
= (int32_t)uprv_strlen(genrbdata
->inputDir
);
719 if(genrbdata
->inputDir
[dirlen
-1] != U_FILE_SEP_CHAR
) {
720 openFileName
= (char *) uprv_malloc(dirlen
+ filelen
+ 2);
723 if(openFileName
== NULL
) {
724 *status
= U_MEMORY_ALLOCATION_ERROR
;
728 openFileName
[0] = '\0';
730 * append the input dir to openFileName if the first char in
731 * filename is not file seperation char and the last char input directory is not '.'.
732 * This is to support :
733 * genrb -s. /home/icu/data
735 * The user cannot mix notations like
736 * genrb -s. /icu/data --- the absolute path specified. -s redundant
738 * genrb -s. icu/data --- start from CWD and look in icu/data dir
740 if( (filename
[0] != U_FILE_SEP_CHAR
) && (genrbdata
->inputDir
[dirlen
-1] !='.')){
741 uprv_strcpy(openFileName
, genrbdata
->inputDir
);
742 openFileName
[dirlen
] = U_FILE_SEP_CHAR
;
744 openFileName
[dirlen
+ 1] = '\0';
746 openFileName
= (char *) uprv_malloc(dirlen
+ filelen
+ 1);
749 if(openFileName
== NULL
) {
750 *status
= U_MEMORY_ALLOCATION_ERROR
;
754 uprv_strcpy(openFileName
, genrbdata
->inputDir
);
758 uprv_strcat(openFileName
, filename
);
759 /* printf("%s\n", openFileName); */
760 *status
= U_ZERO_ERROR
;
761 ucbuf
= ucbuf_open(openFileName
, &cp
,getShowWarning(),TRUE
, status
);
763 if(*status
== U_FILE_ACCESS_ERROR
) {
765 fprintf(stderr
, "couldn't open file %s\n", openFileName
== NULL
? filename
: openFileName
);
768 if (ucbuf
== NULL
|| U_FAILURE(*status
)) {
769 fprintf(stderr
, "An error occured processing file %s. Error: %s\n", openFileName
== NULL
? filename
: openFileName
,u_errorName(*status
));
773 /* Parse the data into an SRBRoot */
774 data
= parse(ucbuf
, genrbdata
->inputDir
, genrbdata
->outputDir
, FALSE
, status
);
777 collations
= resLookup(root
, "collations");
778 if (collations
!= NULL
) {
779 collation
= resLookup(collations
, type
);
780 if (collation
!= NULL
) {
781 sequence
= resLookup(collation
, "Sequence");
782 if (sequence
!= NULL
) {
783 urules
= sequence
->u
.fString
.fChars
;
784 urulesLength
= sequence
->u
.fString
.fLength
;
785 *pLength
= urulesLength
;
791 if (inputDirBuf
!= NULL
) {
792 uprv_free(inputDirBuf
);
795 if (openFileName
!= NULL
) {
796 uprv_free(openFileName
);
806 // Quick-and-dirty escaping function.
807 // Assumes that we are on an ASCII-based platform.
809 escape(const UChar
*s
, char *buffer
) {
810 int32_t length
= u_strlen(s
);
814 U16_NEXT(s
, i
, length
, c
);
818 } else if (0x20 <= c
&& c
<= 0x7e) {
820 *buffer
++ = (char)c
; // assumes ASCII-based platform
822 buffer
+= sprintf(buffer
, "\\u%04X", (int)c
);
827 static struct SResource
*
828 addCollation(ParseState
* state
, struct SResource
*result
, uint32_t startline
, UErrorCode
*status
)
830 struct SResource
*member
= NULL
;
831 struct UString
*tokenValue
;
832 struct UString comment
;
833 enum ETokenType token
;
835 UVersionInfo version
;
838 /* '{' . (name resource)* '}' */
839 version
[0]=0; version
[1]=0; version
[2]=0; version
[3]=0;
844 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
846 if (token
== TOK_CLOSE_BRACE
)
851 if (token
!= TOK_STRING
)
854 *status
= U_INVALID_FORMAT_ERROR
;
856 if (token
== TOK_EOF
)
858 error(startline
, "unterminated table");
862 error(line
, "Unexpected token %s", tokenNames
[token
]);
868 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
870 if (U_FAILURE(*status
))
876 member
= parseResource(state
, subtag
, NULL
, status
);
878 if (U_FAILURE(*status
))
884 if (uprv_strcmp(subtag
, "Version") == 0)
887 int32_t length
= member
->u
.fString
.fLength
;
889 if (length
>= (int32_t) sizeof(ver
))
891 length
= (int32_t) sizeof(ver
) - 1;
894 u_UCharsToChars(member
->u
.fString
.fChars
, ver
, length
+ 1); /* +1 for copying NULL */
895 u_versionFromString(version
, ver
);
897 table_add(result
, member
, line
, status
);
900 else if (uprv_strcmp(subtag
, "Override") == 0)
902 // UBool override = (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0);
903 table_add(result
, member
, line
, status
);
906 else if(uprv_strcmp(subtag
, "%%CollationBin")==0)
908 /* discard duplicate %%CollationBin if any*/
910 else if (uprv_strcmp(subtag
, "Sequence") == 0)
912 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
913 warning(line
, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
915 if(state
->makeBinaryCollation
) {
917 /* do the collation elements */
919 uint8_t *data
= NULL
;
920 UCollator
*coll
= NULL
;
921 int32_t reorderCodes
[USCRIPT_CODE_LIMIT
+ (UCOL_REORDER_CODE_LIMIT
- UCOL_REORDER_CODE_FIRST
)];
922 int32_t reorderCodeCount
;
923 int32_t reorderCodeIndex
;
924 UParseError parseError
;
926 genrbdata
.inputDir
= state
->inputdir
;
927 genrbdata
.outputDir
= state
->outputdir
;
929 UErrorCode intStatus
= U_ZERO_ERROR
;
930 uprv_memset(&parseError
, 0, sizeof(parseError
));
931 coll
= ucol_openRulesForImport(member
->u
.fString
.fChars
, member
->u
.fString
.fLength
,
932 UCOL_OFF
, UCOL_DEFAULT_STRENGTH
,&parseError
, importFromDataFile
, &genrbdata
, &intStatus
);
934 if (U_SUCCESS(intStatus
) && coll
!= NULL
)
936 len
= ucol_cloneBinary(coll
, NULL
, 0, &intStatus
);
937 data
= (uint8_t *)uprv_malloc(len
);
938 intStatus
= U_ZERO_ERROR
;
939 len
= ucol_cloneBinary(coll
, data
, len
, &intStatus
);
940 /*data = ucol_cloneRuleData(coll, &len, &intStatus);*/
942 /* tailoring rules version */
944 /*coll->dataInfo.dataVersion[1] = version[0];*/
945 /* Copy tailoring version. Builder version already */
946 /* set in ucol_openRules */
947 ((UCATableHeader
*)data
)->version
[1] = version
[0];
948 ((UCATableHeader
*)data
)->version
[2] = version
[1];
949 ((UCATableHeader
*)data
)->version
[3] = version
[2];
951 if (U_SUCCESS(intStatus
) && data
!= NULL
)
953 struct SResource
*collationBin
= bin_open(state
->bundle
, "%%CollationBin", len
, data
, NULL
, NULL
, status
);
954 table_add(result
, collationBin
, line
, status
);
957 reorderCodeCount
= ucol_getReorderCodes(
958 coll
, reorderCodes
, USCRIPT_CODE_LIMIT
+ (UCOL_REORDER_CODE_LIMIT
- UCOL_REORDER_CODE_FIRST
), &intStatus
);
959 if (U_SUCCESS(intStatus
) && reorderCodeCount
> 0) {
960 struct SResource
*reorderCodeRes
= intvector_open(state
->bundle
, "%%ReorderCodes", NULL
, status
);
961 for (reorderCodeIndex
= 0; reorderCodeIndex
< reorderCodeCount
; reorderCodeIndex
++) {
962 intvector_add(reorderCodeRes
, reorderCodes
[reorderCodeIndex
], status
);
964 table_add(result
, reorderCodeRes
, line
, status
);
969 warning(line
, "could not obtain rules from collator");
971 *status
= U_INVALID_FORMAT_ERROR
;
980 if(intStatus
== U_FILE_ACCESS_ERROR
) {
981 error(startline
, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly.");
985 char preBuffer
[100], postBuffer
[100];
986 escape(parseError
.preContext
, preBuffer
);
987 escape(parseError
.postContext
, postBuffer
);
989 "%%%%CollationBin could not be constructed from CollationElements\n"
990 " check context, check that the FractionalUCA.txt UCA version "
991 "matches the current UCD version\n"
992 " UErrorCode=%s UParseError={ line=%d offset=%d pre=<> post=<> }",
993 u_errorName(intStatus
),
1005 printf("Not building Collation binary\n");
1009 /* in order to achieve smaller data files, we can direct genrb */
1010 /* to omit collation rules */
1011 if(gOmitCollationRules
) {
1012 bundle_closeString(state
->bundle
, member
);
1014 table_add(result
, member
, line
, status
);
1017 if (U_FAILURE(*status
))
1024 // Reached the end without a TOK_CLOSE_BRACE. Should be an error.
1025 *status
= U_INTERNAL_PROGRAM_ERROR
;
1029 static struct SResource
*
1030 parseCollationElements(ParseState
* state
, char *tag
, uint32_t startline
, UBool newCollation
, UErrorCode
*status
)
1032 struct SResource
*result
= NULL
;
1033 struct SResource
*member
= NULL
;
1034 struct SResource
*collationRes
= NULL
;
1035 struct UString
*tokenValue
;
1036 struct UString comment
;
1037 enum ETokenType token
;
1038 char subtag
[1024], typeKeyword
[1024];
1041 result
= table_open(state
->bundle
, tag
, NULL
, status
);
1043 if (result
== NULL
|| U_FAILURE(*status
))
1048 printf(" collation elements %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1051 return addCollation(state
, result
, startline
, status
);
1055 ustr_init(&comment
);
1056 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
1058 if (token
== TOK_CLOSE_BRACE
)
1063 if (token
!= TOK_STRING
)
1066 *status
= U_INVALID_FORMAT_ERROR
;
1068 if (token
== TOK_EOF
)
1070 error(startline
, "unterminated table");
1074 error(line
, "Unexpected token %s", tokenNames
[token
]);
1080 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
1082 if (U_FAILURE(*status
))
1088 if (uprv_strcmp(subtag
, "default") == 0)
1090 member
= parseResource(state
, subtag
, NULL
, status
);
1092 if (U_FAILURE(*status
))
1098 table_add(result
, member
, line
, status
);
1102 token
= peekToken(state
, 0, &tokenValue
, &line
, &comment
, status
);
1103 /* this probably needs to be refactored or recursively use the parser */
1104 /* first we assume that our collation table won't have the explicit type */
1105 /* then, we cannot handle aliases */
1106 if(token
== TOK_OPEN_BRACE
) {
1107 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
1108 collationRes
= table_open(state
->bundle
, subtag
, NULL
, status
);
1109 collationRes
= addCollation(state
, collationRes
, startline
, status
); /* need to parse the collation data regardless */
1110 if (gIncludeUnihanColl
|| uprv_strcmp(subtag
, "unihan") != 0) {
1111 table_add(result
, collationRes
, startline
, status
);
1113 } else if(token
== TOK_COLON
) { /* right now, we'll just try to see if we have aliases */
1114 /* we could have a table too */
1115 token
= peekToken(state
, 1, &tokenValue
, &line
, &comment
, status
);
1116 u_UCharsToChars(tokenValue
->fChars
, typeKeyword
, u_strlen(tokenValue
->fChars
) + 1);
1117 if(uprv_strcmp(typeKeyword
, "alias") == 0) {
1118 member
= parseResource(state
, subtag
, NULL
, status
);
1119 if (U_FAILURE(*status
))
1125 table_add(result
, member
, line
, status
);
1128 *status
= U_INVALID_FORMAT_ERROR
;
1133 *status
= U_INVALID_FORMAT_ERROR
;
1138 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1140 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1142 if (U_FAILURE(*status
))
1151 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1152 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1153 static struct SResource
*
1154 realParseTable(ParseState
* state
, struct SResource
*table
, char *tag
, uint32_t startline
, UErrorCode
*status
)
1156 struct SResource
*member
= NULL
;
1157 struct UString
*tokenValue
=NULL
;
1158 struct UString comment
;
1159 enum ETokenType token
;
1162 UBool readToken
= FALSE
;
1164 /* '{' . (name resource)* '}' */
1167 printf(" parsing table %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1171 ustr_init(&comment
);
1172 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
1174 if (token
== TOK_CLOSE_BRACE
)
1177 warning(startline
, "Encountered empty table");
1182 if (token
!= TOK_STRING
)
1184 *status
= U_INVALID_FORMAT_ERROR
;
1186 if (token
== TOK_EOF
)
1188 error(startline
, "unterminated table");
1192 error(line
, "unexpected token %s", tokenNames
[token
]);
1198 if(uprv_isInvariantUString(tokenValue
->fChars
, -1)) {
1199 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
1201 *status
= U_INVALID_FORMAT_ERROR
;
1202 error(line
, "invariant characters required for table keys");
1206 if (U_FAILURE(*status
))
1208 error(line
, "parse error. Stopped parsing tokens with %s", u_errorName(*status
));
1212 member
= parseResource(state
, subtag
, &comment
, status
);
1214 if (member
== NULL
|| U_FAILURE(*status
))
1216 error(line
, "parse error. Stopped parsing resource with %s", u_errorName(*status
));
1220 table_add(table
, member
, line
, status
);
1222 if (U_FAILURE(*status
))
1224 error(line
, "parse error. Stopped parsing table with %s", u_errorName(*status
));
1228 ustr_deinit(&comment
);
1232 /* A compiler warning will appear if all paths don't contain a return statement. */
1233 /* *status = U_INTERNAL_PROGRAM_ERROR;
1237 static struct SResource
*
1238 parseTable(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1240 struct SResource
*result
;
1242 if (tag
!= NULL
&& uprv_strcmp(tag
, "CollationElements") == 0)
1244 return parseCollationElements(state
, tag
, startline
, FALSE
, status
);
1246 if (tag
!= NULL
&& uprv_strcmp(tag
, "collations") == 0)
1248 return parseCollationElements(state
, tag
, startline
, TRUE
, status
);
1251 printf(" table %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1254 result
= table_open(state
->bundle
, tag
, comment
, status
);
1256 if (result
== NULL
|| U_FAILURE(*status
))
1260 return realParseTable(state
, result
, tag
, startline
, status
);
1263 static struct SResource
*
1264 parseArray(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1266 struct SResource
*result
= NULL
;
1267 struct SResource
*member
= NULL
;
1268 struct UString
*tokenValue
;
1269 struct UString memberComments
;
1270 enum ETokenType token
;
1271 UBool readToken
= FALSE
;
1273 result
= array_open(state
->bundle
, tag
, comment
, status
);
1275 if (result
== NULL
|| U_FAILURE(*status
))
1280 printf(" array %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1283 ustr_init(&memberComments
);
1285 /* '{' . resource [','] '}' */
1289 ustr_setlen(&memberComments
, 0, status
);
1291 /* check for end of array, but don't consume next token unless it really is the end */
1292 token
= peekToken(state
, 0, &tokenValue
, NULL
, &memberComments
, status
);
1295 if (token
== TOK_CLOSE_BRACE
)
1297 getToken(state
, NULL
, NULL
, NULL
, status
);
1299 warning(startline
, "Encountered empty array");
1304 if (token
== TOK_EOF
)
1307 *status
= U_INVALID_FORMAT_ERROR
;
1308 error(startline
, "unterminated array");
1312 /* string arrays are a special case */
1313 if (token
== TOK_STRING
)
1315 getToken(state
, &tokenValue
, &memberComments
, NULL
, status
);
1316 member
= string_open(state
->bundle
, NULL
, tokenValue
->fChars
, tokenValue
->fLength
, &memberComments
, status
);
1320 member
= parseResource(state
, NULL
, &memberComments
, status
);
1323 if (member
== NULL
|| U_FAILURE(*status
))
1329 array_add(result
, member
, status
);
1331 if (U_FAILURE(*status
))
1337 /* eat optional comma if present */
1338 token
= peekToken(state
, 0, NULL
, NULL
, NULL
, status
);
1340 if (token
== TOK_COMMA
)
1342 getToken(state
, NULL
, NULL
, NULL
, status
);
1345 if (U_FAILURE(*status
))
1353 ustr_deinit(&memberComments
);
1357 static struct SResource
*
1358 parseIntVector(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1360 struct SResource
*result
= NULL
;
1361 enum ETokenType token
;
1364 UBool readToken
= FALSE
;
1367 struct UString memberComments
;
1369 result
= intvector_open(state
->bundle
, tag
, comment
, status
);
1371 if (result
== NULL
|| U_FAILURE(*status
))
1377 printf(" vector %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1379 ustr_init(&memberComments
);
1380 /* '{' . string [','] '}' */
1383 ustr_setlen(&memberComments
, 0, status
);
1385 /* check for end of array, but don't consume next token unless it really is the end */
1386 token
= peekToken(state
, 0, NULL
, NULL
,&memberComments
, status
);
1388 if (token
== TOK_CLOSE_BRACE
)
1390 /* it's the end, consume the close brace */
1391 getToken(state
, NULL
, NULL
, NULL
, status
);
1393 warning(startline
, "Encountered empty int vector");
1395 ustr_deinit(&memberComments
);
1399 string
= getInvariantString(state
, NULL
, NULL
, status
);
1401 if (U_FAILURE(*status
))
1407 /* For handling illegal char in the Intvector */
1408 value
= uprv_strtoul(string
, &stopstring
, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1409 len
=(uint32_t)(stopstring
-string
);
1411 if(len
==uprv_strlen(string
))
1413 intvector_add(result
, value
, status
);
1415 token
= peekToken(state
, 0, NULL
, NULL
, NULL
, status
);
1420 *status
=U_INVALID_CHAR_FOUND
;
1423 if (U_FAILURE(*status
))
1429 /* the comma is optional (even though it is required to prevent the reader from concatenating
1430 consecutive entries) so that a missing comma on the last entry isn't an error */
1431 if (token
== TOK_COMMA
)
1433 getToken(state
, NULL
, NULL
, NULL
, status
);
1439 /* A compiler warning will appear if all paths don't contain a return statement. */
1440 /* intvector_close(result, status);
1441 *status = U_INTERNAL_PROGRAM_ERROR;
1445 static struct SResource
*
1446 parseBinary(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1448 struct SResource
*result
= NULL
;
1451 char toConv
[3] = {'\0', '\0', '\0'};
1458 string
= getInvariantString(state
, &line
, NULL
, status
);
1460 if (string
== NULL
|| U_FAILURE(*status
))
1465 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1467 if (U_FAILURE(*status
))
1474 printf(" binary %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1477 count
= (uint32_t)uprv_strlen(string
);
1480 value
= static_cast<uint8_t *>(uprv_malloc(sizeof(uint8_t) * count
));
1485 *status
= U_MEMORY_ALLOCATION_ERROR
;
1489 for (i
= 0; i
< count
; i
+= 2)
1491 toConv
[0] = string
[i
];
1492 toConv
[1] = string
[i
+ 1];
1494 value
[i
>> 1] = (uint8_t) uprv_strtoul(toConv
, &stopstring
, 16);
1495 len
=(uint32_t)(stopstring
-toConv
);
1497 if(len
!=uprv_strlen(toConv
))
1500 *status
=U_INVALID_CHAR_FOUND
;
1505 result
= bin_open(state
->bundle
, tag
, (i
>> 1), value
,NULL
, comment
, status
);
1511 *status
= U_INVALID_CHAR_FOUND
;
1513 error(line
, "Encountered invalid binary string");
1519 result
= bin_open(state
->bundle
, tag
, 0, NULL
, "",comment
,status
);
1520 warning(startline
, "Encountered empty binary tag");
1527 static struct SResource
*
1528 parseInteger(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1530 struct SResource
*result
= NULL
;
1536 string
= getInvariantString(state
, NULL
, NULL
, status
);
1538 if (string
== NULL
|| U_FAILURE(*status
))
1543 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1545 if (U_FAILURE(*status
))
1552 printf(" integer %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1555 if (uprv_strlen(string
) <= 0)
1557 warning(startline
, "Encountered empty integer. Default value is 0.");
1560 /* Allow integer support for hexdecimal, octal digit and decimal*/
1561 /* and handle illegal char in the integer*/
1562 value
= uprv_strtoul(string
, &stopstring
, 0);
1563 len
=(uint32_t)(stopstring
-string
);
1564 if(len
==uprv_strlen(string
))
1566 result
= int_open(state
->bundle
, tag
, value
, comment
, status
);
1570 *status
=U_INVALID_CHAR_FOUND
;
1577 static struct SResource
*
1578 parseImport(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
1580 struct SResource
*result
;
1586 char *fullname
= NULL
;
1587 filename
= getInvariantString(state
, &line
, NULL
, status
);
1589 if (U_FAILURE(*status
))
1594 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1596 if (U_FAILURE(*status
))
1598 uprv_free(filename
);
1603 printf(" import %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1606 /* Open the input file for reading */
1607 if (state
->inputdir
== NULL
)
1611 * Always save file file name, even if there's
1612 * no input directory specified. MIGHT BREAK SOMETHING
1614 int32_t filenameLength
= uprv_strlen(filename
);
1616 fullname
= (char *) uprv_malloc(filenameLength
+ 1);
1617 uprv_strcpy(fullname
, filename
);
1620 file
= T_FileStream_open(filename
, "rb");
1625 int32_t count
= (int32_t)uprv_strlen(filename
);
1627 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
1629 fullname
= (char *) uprv_malloc(state
->inputdirLength
+ count
+ 2);
1632 if(fullname
== NULL
)
1634 *status
= U_MEMORY_ALLOCATION_ERROR
;
1638 uprv_strcpy(fullname
, state
->inputdir
);
1640 fullname
[state
->inputdirLength
] = U_FILE_SEP_CHAR
;
1641 fullname
[state
->inputdirLength
+ 1] = '\0';
1643 uprv_strcat(fullname
, filename
);
1647 fullname
= (char *) uprv_malloc(state
->inputdirLength
+ count
+ 1);
1650 if(fullname
== NULL
)
1652 *status
= U_MEMORY_ALLOCATION_ERROR
;
1656 uprv_strcpy(fullname
, state
->inputdir
);
1657 uprv_strcat(fullname
, filename
);
1660 file
= T_FileStream_open(fullname
, "rb");
1666 error(line
, "couldn't open input file %s", filename
);
1667 *status
= U_FILE_ACCESS_ERROR
;
1671 len
= T_FileStream_size(file
);
1672 data
= (uint8_t*)uprv_malloc(len
* sizeof(uint8_t));
1676 *status
= U_MEMORY_ALLOCATION_ERROR
;
1677 T_FileStream_close (file
);
1681 /* int32_t numRead = */ T_FileStream_read (file
, data
, len
);
1682 T_FileStream_close (file
);
1684 result
= bin_open(state
->bundle
, tag
, len
, data
, fullname
, comment
, status
);
1687 uprv_free(filename
);
1688 uprv_free(fullname
);
1693 static struct SResource
*
1694 parseInclude(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
1696 struct SResource
*result
;
1700 UChar
*pTarget
= NULL
;
1703 char *fullname
= NULL
;
1705 const char* cp
= NULL
;
1706 const UChar
* uBuffer
= NULL
;
1708 filename
= getInvariantString(state
, &line
, NULL
, status
);
1709 count
= (int32_t)uprv_strlen(filename
);
1711 if (U_FAILURE(*status
))
1716 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1718 if (U_FAILURE(*status
))
1720 uprv_free(filename
);
1725 printf(" include %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1728 fullname
= (char *) uprv_malloc(state
->inputdirLength
+ count
+ 2);
1730 if(fullname
== NULL
)
1732 *status
= U_MEMORY_ALLOCATION_ERROR
;
1733 uprv_free(filename
);
1737 if(state
->inputdir
!=NULL
){
1738 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
1741 uprv_strcpy(fullname
, state
->inputdir
);
1743 fullname
[state
->inputdirLength
] = U_FILE_SEP_CHAR
;
1744 fullname
[state
->inputdirLength
+ 1] = '\0';
1746 uprv_strcat(fullname
, filename
);
1750 uprv_strcpy(fullname
, state
->inputdir
);
1751 uprv_strcat(fullname
, filename
);
1754 uprv_strcpy(fullname
,filename
);
1757 ucbuf
= ucbuf_open(fullname
, &cp
,getShowWarning(),FALSE
,status
);
1759 if (U_FAILURE(*status
)) {
1760 error(line
, "couldn't open input file %s\n", filename
);
1764 uBuffer
= ucbuf_getBuffer(ucbuf
,&len
,status
);
1765 result
= string_open(state
->bundle
, tag
, uBuffer
, len
, comment
, status
);
1771 uprv_free(filename
);
1772 uprv_free(fullname
);
1781 U_STRING_DECL(k_type_string
, "string", 6);
1782 U_STRING_DECL(k_type_binary
, "binary", 6);
1783 U_STRING_DECL(k_type_bin
, "bin", 3);
1784 U_STRING_DECL(k_type_table
, "table", 5);
1785 U_STRING_DECL(k_type_table_no_fallback
, "table(nofallback)", 17);
1786 U_STRING_DECL(k_type_int
, "int", 3);
1787 U_STRING_DECL(k_type_integer
, "integer", 7);
1788 U_STRING_DECL(k_type_array
, "array", 5);
1789 U_STRING_DECL(k_type_alias
, "alias", 5);
1790 U_STRING_DECL(k_type_intvector
, "intvector", 9);
1791 U_STRING_DECL(k_type_import
, "import", 6);
1792 U_STRING_DECL(k_type_include
, "include", 7);
1794 /* Various non-standard processing plugins that create one or more special resources. */
1795 U_STRING_DECL(k_type_plugin_uca_rules
, "process(uca_rules)", 18);
1796 U_STRING_DECL(k_type_plugin_collation
, "process(collation)", 18);
1797 U_STRING_DECL(k_type_plugin_transliterator
, "process(transliterator)", 23);
1798 U_STRING_DECL(k_type_plugin_dependency
, "process(dependency)", 19);
1800 typedef enum EResourceType
1806 RT_TABLE_NO_FALLBACK
,
1813 RT_PROCESS_UCA_RULES
,
1814 RT_PROCESS_COLLATION
,
1815 RT_PROCESS_TRANSLITERATOR
,
1816 RT_PROCESS_DEPENDENCY
,
1821 const char *nameChars
; /* only used for debugging */
1822 const UChar
*nameUChars
;
1823 ParseResourceFunction
*parseFunction
;
1824 } gResourceTypes
[] = {
1825 {"Unknown", NULL
, NULL
},
1826 {"string", k_type_string
, parseString
},
1827 {"binary", k_type_binary
, parseBinary
},
1828 {"table", k_type_table
, parseTable
},
1829 {"table(nofallback)", k_type_table_no_fallback
, NULL
}, /* parseFunction will never be called */
1830 {"integer", k_type_integer
, parseInteger
},
1831 {"array", k_type_array
, parseArray
},
1832 {"alias", k_type_alias
, parseAlias
},
1833 {"intvector", k_type_intvector
, parseIntVector
},
1834 {"import", k_type_import
, parseImport
},
1835 {"include", k_type_include
, parseInclude
},
1836 {"process(uca_rules)", k_type_plugin_uca_rules
, parseUCARules
},
1837 {"process(collation)", k_type_plugin_collation
, NULL
/* not implemented yet */},
1838 {"process(transliterator)", k_type_plugin_transliterator
, parseTransliterator
},
1839 {"process(dependency)", k_type_plugin_dependency
, parseDependency
},
1840 {"reserved", NULL
, NULL
}
1843 void initParser(UBool omitCollationRules
)
1845 U_STRING_INIT(k_type_string
, "string", 6);
1846 U_STRING_INIT(k_type_binary
, "binary", 6);
1847 U_STRING_INIT(k_type_bin
, "bin", 3);
1848 U_STRING_INIT(k_type_table
, "table", 5);
1849 U_STRING_INIT(k_type_table_no_fallback
, "table(nofallback)", 17);
1850 U_STRING_INIT(k_type_int
, "int", 3);
1851 U_STRING_INIT(k_type_integer
, "integer", 7);
1852 U_STRING_INIT(k_type_array
, "array", 5);
1853 U_STRING_INIT(k_type_alias
, "alias", 5);
1854 U_STRING_INIT(k_type_intvector
, "intvector", 9);
1855 U_STRING_INIT(k_type_import
, "import", 6);
1856 U_STRING_INIT(k_type_include
, "include", 7);
1858 U_STRING_INIT(k_type_plugin_uca_rules
, "process(uca_rules)", 18);
1859 U_STRING_INIT(k_type_plugin_collation
, "process(collation)", 18);
1860 U_STRING_INIT(k_type_plugin_transliterator
, "process(transliterator)", 23);
1861 U_STRING_INIT(k_type_plugin_dependency
, "process(dependency)", 19);
1863 gOmitCollationRules
= omitCollationRules
;
1866 static inline UBool
isTable(enum EResourceType type
) {
1867 return (UBool
)(type
==RT_TABLE
|| type
==RT_TABLE_NO_FALLBACK
);
1870 static enum EResourceType
1871 parseResourceType(ParseState
* state
, UErrorCode
*status
)
1873 struct UString
*tokenValue
;
1874 struct UString comment
;
1875 enum EResourceType result
= RT_UNKNOWN
;
1877 ustr_init(&comment
);
1878 expect(state
, TOK_STRING
, &tokenValue
, &comment
, &line
, status
);
1880 if (U_FAILURE(*status
))
1885 *status
= U_ZERO_ERROR
;
1887 /* Search for normal types */
1889 while ((result
=(EResourceType
)(result
+1)) < RT_RESERVED
) {
1890 if (u_strcmp(tokenValue
->fChars
, gResourceTypes
[result
].nameUChars
) == 0) {
1894 /* Now search for the aliases */
1895 if (u_strcmp(tokenValue
->fChars
, k_type_int
) == 0) {
1896 result
= RT_INTEGER
;
1898 else if (u_strcmp(tokenValue
->fChars
, k_type_bin
) == 0) {
1901 else if (result
== RT_RESERVED
) {
1902 char tokenBuffer
[1024];
1903 u_austrncpy(tokenBuffer
, tokenValue
->fChars
, sizeof(tokenBuffer
));
1904 tokenBuffer
[sizeof(tokenBuffer
) - 1] = 0;
1905 *status
= U_INVALID_FORMAT_ERROR
;
1906 error(line
, "unknown resource type '%s'", tokenBuffer
);
1912 /* parse a non-top-level resource */
1913 static struct SResource
*
1914 parseResource(ParseState
* state
, char *tag
, const struct UString
*comment
, UErrorCode
*status
)
1916 enum ETokenType token
;
1917 enum EResourceType resType
= RT_UNKNOWN
;
1918 ParseResourceFunction
*parseFunction
= NULL
;
1919 struct UString
*tokenValue
;
1924 token
= getToken(state
, &tokenValue
, NULL
, &startline
, status
);
1927 printf(" resource %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1930 /* name . [ ':' type ] '{' resource '}' */
1931 /* This function parses from the colon onwards. If the colon is present, parse the
1932 type then try to parse a resource of that type. If there is no explicit type,
1933 work it out using the lookahead tokens. */
1937 *status
= U_INVALID_FORMAT_ERROR
;
1938 error(startline
, "Unexpected EOF encountered");
1942 *status
= U_INVALID_FORMAT_ERROR
;
1946 resType
= parseResourceType(state
, status
);
1947 expect(state
, TOK_OPEN_BRACE
, &tokenValue
, NULL
, &startline
, status
);
1949 if (U_FAILURE(*status
))
1956 case TOK_OPEN_BRACE
:
1960 *status
= U_INVALID_FORMAT_ERROR
;
1961 error(startline
, "syntax error while reading a resource, expected '{' or ':'");
1966 if (resType
== RT_UNKNOWN
)
1968 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
1969 We could have any of the following:
1970 { { => array (nested)
1972 { string , => string array
1976 { string :/{ => table
1977 { string } => string
1980 token
= peekToken(state
, 0, NULL
, &line
, NULL
,status
);
1982 if (U_FAILURE(*status
))
1987 if (token
== TOK_OPEN_BRACE
|| token
== TOK_COLON
||token
==TOK_CLOSE_BRACE
)
1991 else if (token
== TOK_STRING
)
1993 token
= peekToken(state
, 1, NULL
, &line
, NULL
, status
);
1995 if (U_FAILURE(*status
))
2002 case TOK_COMMA
: resType
= RT_ARRAY
; break;
2003 case TOK_OPEN_BRACE
: resType
= RT_TABLE
; break;
2004 case TOK_CLOSE_BRACE
: resType
= RT_STRING
; break;
2005 case TOK_COLON
: resType
= RT_TABLE
; break;
2007 *status
= U_INVALID_FORMAT_ERROR
;
2008 error(line
, "Unexpected token after string, expected ',', '{' or '}'");
2014 *status
= U_INVALID_FORMAT_ERROR
;
2015 error(line
, "Unexpected token after '{'");
2019 /* printf("Type guessed as %s\n", resourceNames[resType]); */
2020 } else if(resType
== RT_TABLE_NO_FALLBACK
) {
2021 *status
= U_INVALID_FORMAT_ERROR
;
2022 error(startline
, "error: %s resource type not valid except on top bundle level", gResourceTypes
[resType
].nameChars
);
2027 /* We should now know what we need to parse next, so call the appropriate parser
2028 function and return. */
2029 parseFunction
= gResourceTypes
[resType
].parseFunction
;
2030 if (parseFunction
!= NULL
) {
2031 return parseFunction(state
, tag
, startline
, comment
, status
);
2034 *status
= U_INTERNAL_PROGRAM_ERROR
;
2035 error(startline
, "internal error: %s resource type found and not handled", gResourceTypes
[resType
].nameChars
);
2041 /* parse the top-level resource */
2043 parse(UCHARBUF
*buf
, const char *inputDir
, const char *outputDir
, UBool makeBinaryCollation
,
2046 struct UString
*tokenValue
;
2047 struct UString comment
;
2049 enum EResourceType bundleType
;
2050 enum ETokenType token
;
2055 for (i
= 0; i
< MAX_LOOKAHEAD
+ 1; i
++)
2057 ustr_init(&state
.lookahead
[i
].value
);
2058 ustr_init(&state
.lookahead
[i
].comment
);
2061 initLookahead(&state
, buf
, status
);
2063 state
.inputdir
= inputDir
;
2064 state
.inputdirLength
= (state
.inputdir
!= NULL
) ? (uint32_t)uprv_strlen(state
.inputdir
) : 0;
2065 state
.outputdir
= outputDir
;
2066 state
.outputdirLength
= (state
.outputdir
!= NULL
) ? (uint32_t)uprv_strlen(state
.outputdir
) : 0;
2067 state
.makeBinaryCollation
= makeBinaryCollation
;
2069 ustr_init(&comment
);
2070 expect(&state
, TOK_STRING
, &tokenValue
, &comment
, NULL
, status
);
2072 state
.bundle
= bundle_open(&comment
, FALSE
, status
);
2074 if (state
.bundle
== NULL
|| U_FAILURE(*status
))
2080 bundle_setlocale(state
.bundle
, tokenValue
->fChars
, status
);
2082 /* The following code is to make Empty bundle work no matter with :table specifer or not */
2083 token
= getToken(&state
, NULL
, NULL
, &line
, status
);
2084 if(token
==TOK_COLON
) {
2085 *status
=U_ZERO_ERROR
;
2086 bundleType
=parseResourceType(&state
, status
);
2088 if(isTable(bundleType
))
2090 expect(&state
, TOK_OPEN_BRACE
, NULL
, NULL
, &line
, status
);
2094 *status
=U_PARSE_ERROR
;
2095 error(line
, "parse error. Stopped parsing with %s", u_errorName(*status
));
2101 if(token
==TOK_OPEN_BRACE
)
2103 *status
=U_ZERO_ERROR
;
2104 bundleType
=RT_TABLE
;
2108 /* neither colon nor open brace */
2109 *status
=U_PARSE_ERROR
;
2110 bundleType
=RT_UNKNOWN
;
2111 error(line
, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status
));
2115 if (U_FAILURE(*status
))
2117 bundle_close(state
.bundle
, status
);
2121 if(bundleType
==RT_TABLE_NO_FALLBACK
) {
2123 * Parse a top-level table with the table(nofallback) declaration.
2124 * This is the same as a regular table, but also sets the
2125 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2127 state
.bundle
->noFallback
=TRUE
;
2129 /* top-level tables need not handle special table names like "collations" */
2130 realParseTable(&state
, state
.bundle
->fRoot
, NULL
, line
, status
);
2131 if(dependencyArray
!=NULL
){
2132 table_add(state
.bundle
->fRoot
, dependencyArray
, 0, status
);
2133 dependencyArray
= NULL
;
2135 if (U_FAILURE(*status
))
2137 bundle_close(state
.bundle
, status
);
2138 res_close(dependencyArray
);
2142 if (getToken(&state
, NULL
, NULL
, &line
, status
) != TOK_EOF
)
2144 warning(line
, "extraneous text after resource bundle (perhaps unmatched braces)");
2146 *status
= U_INVALID_FORMAT_ERROR
;
2151 cleanupLookahead(&state
);
2152 ustr_deinit(&comment
);
2153 return state
.bundle
;