2 *******************************************************************************
4 * Copyright (C) 1998-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 05/26/99 stephen Creation.
15 * 02/25/00 weiv Overhaul to write udata
16 * 5/10/01 Ram removed ustdio dependency
17 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
18 *******************************************************************************
33 #include "unicode/ustring.h"
34 #include "unicode/uscript.h"
35 #include "unicode/putil.h"
38 extern UBool gIncludeUnihanColl
;
40 /* Number of tokens to read ahead of the current stream position */
41 #define MAX_LOOKAHEAD 3
51 #define STARTCOMMAND 0x005B
52 #define ENDCOMMAND 0x005D
53 #define OPENSQBRACKET 0x005B
54 #define CLOSESQBRACKET 0x005D
60 struct UString comment
;
64 /* keep in sync with token defines in read.h */
65 const char *tokenNames
[TOK_TOKEN_COUNT
] =
67 "string", /* A string token, such as "MonthNames" */
68 "'{'", /* An opening brace character */
69 "'}'", /* A closing brace character */
73 "<end of file>", /* End of the file has been reached successfully */
77 /* Just to store "TRUE" */
78 static const UChar trueValue
[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
81 struct Lookahead lookahead
[MAX_LOOKAHEAD
+ 1];
82 uint32_t lookaheadPosition
;
84 struct SRBRoot
*bundle
;
86 uint32_t inputdirLength
;
87 const char *outputdir
;
88 uint32_t outputdirLength
;
91 static UBool gMakeBinaryCollation
= TRUE
;
92 static UBool gOmitCollationRules
= FALSE
;
94 typedef struct SResource
*
95 ParseResourceFunction(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
);
97 static struct SResource
*parseResource(ParseState
* state
, char *tag
, const struct UString
*comment
, UErrorCode
*status
);
99 /* The nature of the lookahead buffer:
100 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
101 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
102 When getToken is called, the current pointer is moved to the next slot and the
103 old slot is filled with the next token from the reader by calling getNextToken.
104 The token values are stored in the slot, which means that token values don't
105 survive a call to getToken, ie.
109 getToken(&value, NULL, status);
110 getToken(NULL, NULL, status); bad - value is now a different string
113 initLookahead(ParseState
* state
, UCHARBUF
*buf
, UErrorCode
*status
)
115 static uint32_t initTypeStrings
= 0;
118 if (!initTypeStrings
)
123 state
->lookaheadPosition
= 0;
128 for (i
= 0; i
< MAX_LOOKAHEAD
; i
++)
130 state
->lookahead
[i
].type
= getNextToken(state
->buffer
, &state
->lookahead
[i
].value
, &state
->lookahead
[i
].line
, &state
->lookahead
[i
].comment
, status
);
131 if (U_FAILURE(*status
))
137 *status
= U_ZERO_ERROR
;
141 cleanupLookahead(ParseState
* state
)
144 for (i
= 0; i
< MAX_LOOKAHEAD
; i
++)
146 ustr_deinit(&state
->lookahead
[i
].value
);
147 ustr_deinit(&state
->lookahead
[i
].comment
);
152 static enum ETokenType
153 getToken(ParseState
* state
, struct UString
**tokenValue
, struct UString
* comment
, uint32_t *linenumber
, UErrorCode
*status
)
155 enum ETokenType result
;
158 result
= state
->lookahead
[state
->lookaheadPosition
].type
;
160 if (tokenValue
!= NULL
)
162 *tokenValue
= &state
->lookahead
[state
->lookaheadPosition
].value
;
165 if (linenumber
!= NULL
)
167 *linenumber
= state
->lookahead
[state
->lookaheadPosition
].line
;
172 ustr_cpy(comment
, &(state
->lookahead
[state
->lookaheadPosition
].comment
), status
);
175 i
= (state
->lookaheadPosition
+ MAX_LOOKAHEAD
) % (MAX_LOOKAHEAD
+ 1);
176 state
->lookaheadPosition
= (state
->lookaheadPosition
+ 1) % (MAX_LOOKAHEAD
+ 1);
177 ustr_setlen(&state
->lookahead
[i
].comment
, 0, status
);
178 ustr_setlen(&state
->lookahead
[i
].value
, 0, status
);
179 state
->lookahead
[i
].type
= getNextToken(state
->buffer
, &state
->lookahead
[i
].value
, &state
->lookahead
[i
].line
, &state
->lookahead
[i
].comment
, status
);
181 /* printf("getToken, returning %s\n", tokenNames[result]); */
186 static enum ETokenType
187 peekToken(ParseState
* state
, uint32_t lookaheadCount
, struct UString
**tokenValue
, uint32_t *linenumber
, struct UString
*comment
, UErrorCode
*status
)
189 uint32_t i
= (state
->lookaheadPosition
+ lookaheadCount
) % (MAX_LOOKAHEAD
+ 1);
191 if (U_FAILURE(*status
))
196 if (lookaheadCount
>= MAX_LOOKAHEAD
)
198 *status
= U_INTERNAL_PROGRAM_ERROR
;
202 if (tokenValue
!= NULL
)
204 *tokenValue
= &state
->lookahead
[i
].value
;
207 if (linenumber
!= NULL
)
209 *linenumber
= state
->lookahead
[i
].line
;
213 ustr_cpy(comment
, &(state
->lookahead
[state
->lookaheadPosition
].comment
), status
);
216 return state
->lookahead
[i
].type
;
220 expect(ParseState
* state
, enum ETokenType expectedToken
, struct UString
**tokenValue
, struct UString
*comment
, uint32_t *linenumber
, UErrorCode
*status
)
224 enum ETokenType token
= getToken(state
, tokenValue
, comment
, &line
, status
);
226 if (linenumber
!= NULL
)
231 if (U_FAILURE(*status
))
236 if (token
!= expectedToken
)
238 *status
= U_INVALID_FORMAT_ERROR
;
239 error(line
, "expecting %s, got %s", tokenNames
[expectedToken
], tokenNames
[token
]);
243 *status
= U_ZERO_ERROR
;
247 static char *getInvariantString(ParseState
* state
, uint32_t *line
, struct UString
*comment
, UErrorCode
*status
)
249 struct UString
*tokenValue
;
253 expect(state
, TOK_STRING
, &tokenValue
, comment
, line
, status
);
255 if (U_FAILURE(*status
))
260 count
= u_strlen(tokenValue
->fChars
);
261 if(!uprv_isInvariantUString(tokenValue
->fChars
, count
)) {
262 *status
= U_INVALID_FORMAT_ERROR
;
263 error(*line
, "invariant characters required for table keys, binary data, etc.");
267 result
= uprv_malloc(count
+1);
271 *status
= U_MEMORY_ALLOCATION_ERROR
;
275 u_UCharsToChars(tokenValue
->fChars
, result
, count
+1);
279 static struct SResource
*
280 parseUCARules(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
282 struct SResource
*result
= NULL
;
283 struct UString
*tokenValue
;
284 FileStream
*file
= NULL
;
285 char filename
[256] = { '\0' };
286 char cs
[128] = { '\0' };
289 UBool quoted
= FALSE
;
290 UCHARBUF
*ucbuf
=NULL
;
292 const char* cp
= NULL
;
293 UChar
*pTarget
= NULL
;
294 UChar
*target
= NULL
;
295 UChar
*targetLimit
= NULL
;
298 expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
);
301 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
304 if (U_FAILURE(*status
))
308 /* make the filename including the directory */
309 if (state
->inputdir
!= NULL
)
311 uprv_strcat(filename
, state
->inputdir
);
313 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
315 uprv_strcat(filename
, U_FILE_SEP_STRING
);
319 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
321 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
323 if (U_FAILURE(*status
))
327 uprv_strcat(filename
, cs
);
329 if(gOmitCollationRules
) {
333 ucbuf
= ucbuf_open(filename
, &cp
, getShowWarning(),FALSE
, status
);
335 if (U_FAILURE(*status
)) {
336 error(line
, "An error occured while opening the input file %s\n", filename
);
340 /* We allocate more space than actually required
341 * since the actual size needed for storing UChars
342 * is not known in UTF-8 byte stream
344 size
= ucbuf_size(ucbuf
) + 1;
345 pTarget
= (UChar
*) uprv_malloc(U_SIZEOF_UCHAR
* size
);
346 uprv_memset(pTarget
, 0, size
*U_SIZEOF_UCHAR
);
348 targetLimit
= pTarget
+size
;
350 /* read the rules into the buffer */
351 while (target
< targetLimit
)
353 c
= ucbuf_getc(ucbuf
, status
);
355 quoted
= (UBool
)!quoted
;
357 /* weiv (06/26/2002): adding the following:
358 * - preserving spaces in commands [...]
359 * - # comments until the end of line
361 if (c
== STARTCOMMAND
&& !quoted
)
364 * closing bracket will be handled by the
365 * append at the end of the loop
367 while(c
!= ENDCOMMAND
) {
368 U_APPEND_CHAR32(c
, target
,len
);
369 c
= ucbuf_getc(ucbuf
, status
);
372 else if (c
== HASH
&& !quoted
) {
374 while(c
!= CR
&& c
!= LF
) {
375 c
= ucbuf_getc(ucbuf
, status
);
379 else if (c
== ESCAPE
)
381 c
= unescape(ucbuf
, status
);
386 T_FileStream_close(file
);
390 else if (!quoted
&& (c
== SPACE
|| c
== TAB
|| c
== CR
|| c
== LF
))
392 /* ignore spaces carriage returns
393 * and line feed unless in the form \uXXXX
398 /* Append UChar * after dissembling if c > 0xffff*/
401 U_APPEND_CHAR32(c
, target
,len
);
409 /* terminate the string */
410 if(target
< targetLimit
){
414 result
= string_open(state
->bundle
, tag
, pTarget
, (int32_t)(target
- pTarget
), NULL
, status
);
419 T_FileStream_close(file
);
424 static struct SResource
*
425 parseTransliterator(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
427 struct SResource
*result
= NULL
;
428 struct UString
*tokenValue
;
429 FileStream
*file
= NULL
;
430 char filename
[256] = { '\0' };
431 char cs
[128] = { '\0' };
433 UCHARBUF
*ucbuf
=NULL
;
434 const char* cp
= NULL
;
435 UChar
*pTarget
= NULL
;
436 const UChar
*pSource
= NULL
;
439 expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
);
442 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
445 if (U_FAILURE(*status
))
449 /* make the filename including the directory */
450 if (state
->inputdir
!= NULL
)
452 uprv_strcat(filename
, state
->inputdir
);
454 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
456 uprv_strcat(filename
, U_FILE_SEP_STRING
);
460 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
462 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
464 if (U_FAILURE(*status
))
468 uprv_strcat(filename
, cs
);
471 ucbuf
= ucbuf_open(filename
, &cp
, getShowWarning(),FALSE
, status
);
473 if (U_FAILURE(*status
)) {
474 error(line
, "An error occured while opening the input file %s\n", filename
);
478 /* We allocate more space than actually required
479 * since the actual size needed for storing UChars
480 * is not known in UTF-8 byte stream
482 pSource
= ucbuf_getBuffer(ucbuf
, &size
, status
);
483 pTarget
= (UChar
*) uprv_malloc(U_SIZEOF_UCHAR
* (size
+ 1));
484 uprv_memset(pTarget
, 0, size
*U_SIZEOF_UCHAR
);
486 #if !UCONFIG_NO_TRANSLITERATION
487 size
= utrans_stripRules(pSource
, size
, pTarget
, status
);
490 fprintf(stderr
, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
492 result
= string_open(state
->bundle
, tag
, pTarget
, size
, NULL
, status
);
496 T_FileStream_close(file
);
500 static struct SResource
* dependencyArray
= NULL
;
502 static struct SResource
*
503 parseDependency(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
505 struct SResource
*result
= NULL
;
506 struct SResource
*elem
= NULL
;
507 struct UString
*tokenValue
;
509 char filename
[256] = { '\0' };
510 char cs
[128] = { '\0' };
512 expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
);
515 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
518 if (U_FAILURE(*status
))
522 /* make the filename including the directory */
523 if (state
->outputdir
!= NULL
)
525 uprv_strcat(filename
, state
->outputdir
);
527 if (state
->outputdir
[state
->outputdirLength
- 1] != U_FILE_SEP_CHAR
)
529 uprv_strcat(filename
, U_FILE_SEP_STRING
);
533 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
535 if (U_FAILURE(*status
))
539 uprv_strcat(filename
, cs
);
540 if(!T_FileStream_file_exists(filename
)){
542 error(line
, "The dependency file %s does not exist. Please make sure it exists.\n",filename
);
544 warning(line
, "The dependency file %s does not exist. Please make sure it exists.\n",filename
);
547 if(dependencyArray
==NULL
){
548 dependencyArray
= array_open(state
->bundle
, "%%DEPENDENCY", NULL
, status
);
551 result
= string_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
553 elem
= string_open(state
->bundle
, NULL
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
555 array_add(dependencyArray
, elem
, status
);
557 if (U_FAILURE(*status
))
561 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
564 static struct SResource
*
565 parseString(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
567 struct UString
*tokenValue
;
568 struct SResource
*result
= NULL
;
570 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
572 return parseUCARules(tag, startline, status);
575 printf(" string %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
577 expect(state
, TOK_STRING
, &tokenValue
, NULL
, NULL
, status
);
579 if (U_SUCCESS(*status
))
581 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
582 doesn't survive expect either) */
584 result
= string_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
585 if(U_SUCCESS(*status
) && result
) {
586 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
588 if (U_FAILURE(*status
))
599 static struct SResource
*
600 parseAlias(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
602 struct UString
*tokenValue
;
603 struct SResource
*result
= NULL
;
605 expect(state
, TOK_STRING
, &tokenValue
, NULL
, NULL
, status
);
608 printf(" alias %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
611 if (U_SUCCESS(*status
))
613 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
614 doesn't survive expect either) */
616 result
= alias_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
618 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
620 if (U_FAILURE(*status
))
631 const char* inputDir
;
632 const char* outputDir
;
635 static struct SResource
* resLookup(struct SResource
* res
, const char* key
){
636 struct SResource
*current
= NULL
;
637 struct SResTable
*list
;
638 if (res
== res_none()) {
642 list
= &(res
->u
.fTable
);
644 current
= list
->fFirst
;
645 while (current
!= NULL
) {
646 if (uprv_strcmp(((list
->fRoot
->fKeys
) + (current
->fKey
)), key
) == 0) {
649 current
= current
->fNext
;
654 static const UChar
* importFromDataFile(void* context
, const char* locale
, const char* type
, int32_t* pLength
, UErrorCode
* status
){
655 struct SRBRoot
*data
= NULL
;
656 UCHARBUF
*ucbuf
= NULL
;
657 GenrbData
* genrbdata
= (GenrbData
*) context
;
658 int localeLength
= strlen(locale
);
659 char* filename
= (char*)uprv_malloc(localeLength
+5);
660 char *inputDirBuf
= NULL
;
661 char *openFileName
= NULL
;
663 UChar
* urules
= NULL
;
664 int32_t urulesLength
= 0;
668 struct SResource
* root
;
669 struct SResource
* collations
;
670 struct SResource
* collation
;
671 struct SResource
* sequence
;
673 memcpy(filename
, locale
, localeLength
);
674 for(i
= 0; i
< localeLength
; i
++){
675 if(filename
[i
] == '-'){
679 filename
[localeLength
] = '.';
680 filename
[localeLength
+1] = 't';
681 filename
[localeLength
+2] = 'x';
682 filename
[localeLength
+3] = 't';
683 filename
[localeLength
+4] = 0;
686 if (status
==NULL
|| U_FAILURE(*status
)) {
690 *status
=U_ILLEGAL_ARGUMENT_ERROR
;
693 filelen
= (int32_t)uprv_strlen(filename
);
695 if(genrbdata
->inputDir
== NULL
) {
696 const char *filenameBegin
= uprv_strrchr(filename
, U_FILE_SEP_CHAR
);
697 openFileName
= (char *) uprv_malloc(dirlen
+ filelen
+ 2);
698 openFileName
[0] = '\0';
699 if (filenameBegin
!= NULL
) {
701 * When a filename ../../../data/root.txt is specified,
702 * we presume that the input directory is ../../../data
703 * This is very important when the resource file includes
704 * another file, like UCARules.txt or thaidict.brk.
706 int32_t filenameSize
= (int32_t)(filenameBegin
- filename
+ 1);
707 inputDirBuf
= uprv_strncpy((char *)uprv_malloc(filenameSize
), filename
, filenameSize
);
710 if(inputDirBuf
== NULL
) {
711 *status
= U_MEMORY_ALLOCATION_ERROR
;
715 inputDirBuf
[filenameSize
- 1] = 0;
716 genrbdata
->inputDir
= inputDirBuf
;
717 dirlen
= (int32_t)uprv_strlen(genrbdata
->inputDir
);
720 dirlen
= (int32_t)uprv_strlen(genrbdata
->inputDir
);
722 if(genrbdata
->inputDir
[dirlen
-1] != U_FILE_SEP_CHAR
) {
723 openFileName
= (char *) uprv_malloc(dirlen
+ filelen
+ 2);
726 if(openFileName
== NULL
) {
727 *status
= U_MEMORY_ALLOCATION_ERROR
;
731 openFileName
[0] = '\0';
733 * append the input dir to openFileName if the first char in
734 * filename is not file seperation char and the last char input directory is not '.'.
735 * This is to support :
736 * genrb -s. /home/icu/data
738 * The user cannot mix notations like
739 * genrb -s. /icu/data --- the absolute path specified. -s redundant
741 * genrb -s. icu/data --- start from CWD and look in icu/data dir
743 if( (filename
[0] != U_FILE_SEP_CHAR
) && (genrbdata
->inputDir
[dirlen
-1] !='.')){
744 uprv_strcpy(openFileName
, genrbdata
->inputDir
);
745 openFileName
[dirlen
] = U_FILE_SEP_CHAR
;
747 openFileName
[dirlen
+ 1] = '\0';
749 openFileName
= (char *) uprv_malloc(dirlen
+ filelen
+ 1);
752 if(openFileName
== NULL
) {
753 *status
= U_MEMORY_ALLOCATION_ERROR
;
757 uprv_strcpy(openFileName
, genrbdata
->inputDir
);
761 uprv_strcat(openFileName
, filename
);
762 /* printf("%s\n", openFileName); */
763 *status
= U_ZERO_ERROR
;
764 ucbuf
= ucbuf_open(openFileName
, &cp
,getShowWarning(),TRUE
, status
);
766 if(*status
== U_FILE_ACCESS_ERROR
) {
768 fprintf(stderr
, "couldn't open file %s\n", openFileName
== NULL
? filename
: openFileName
);
771 if (ucbuf
== NULL
|| U_FAILURE(*status
)) {
772 fprintf(stderr
, "An error occured processing file %s. Error: %s\n", openFileName
== NULL
? filename
: openFileName
,u_errorName(*status
));
776 /* Parse the data into an SRBRoot */
777 data
= parse(ucbuf
, genrbdata
->inputDir
, genrbdata
->outputDir
, status
);
780 collations
= resLookup(root
, "collations");
781 collation
= resLookup(collations
, type
);
782 sequence
= resLookup(collation
, "Sequence");
783 urules
= sequence
->u
.fString
.fChars
;
784 urulesLength
= sequence
->u
.fString
.fLength
;
785 *pLength
= urulesLength
;
789 if (inputDirBuf
!= NULL
) {
790 uprv_free(inputDirBuf
);
793 if (openFileName
!= NULL
) {
794 uprv_free(openFileName
);
804 static struct SResource
*
805 addCollation(ParseState
* state
, struct SResource
*result
, uint32_t startline
, UErrorCode
*status
)
807 struct SResource
*member
= NULL
;
808 struct UString
*tokenValue
;
809 struct UString comment
;
810 enum ETokenType token
;
812 UVersionInfo version
;
813 UBool override
= FALSE
;
816 /* '{' . (name resource)* '}' */
817 version
[0]=0; version
[1]=0; version
[2]=0; version
[3]=0;
822 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
824 if (token
== TOK_CLOSE_BRACE
)
829 if (token
!= TOK_STRING
)
832 *status
= U_INVALID_FORMAT_ERROR
;
834 if (token
== TOK_EOF
)
836 error(startline
, "unterminated table");
840 error(line
, "Unexpected token %s", tokenNames
[token
]);
846 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
848 if (U_FAILURE(*status
))
854 member
= parseResource(state
, subtag
, NULL
, status
);
856 if (U_FAILURE(*status
))
862 if (uprv_strcmp(subtag
, "Version") == 0)
865 int32_t length
= member
->u
.fString
.fLength
;
867 if (length
>= (int32_t) sizeof(ver
))
869 length
= (int32_t) sizeof(ver
) - 1;
872 u_UCharsToChars(member
->u
.fString
.fChars
, ver
, length
+ 1); /* +1 for copying NULL */
873 u_versionFromString(version
, ver
);
875 table_add(result
, member
, line
, status
);
878 else if (uprv_strcmp(subtag
, "Override") == 0)
882 if (u_strncmp(member
->u
.fString
.fChars
, trueValue
, u_strlen(trueValue
)) == 0)
886 table_add(result
, member
, line
, status
);
889 else if(uprv_strcmp(subtag
, "%%CollationBin")==0)
891 /* discard duplicate %%CollationBin if any*/
893 else if (uprv_strcmp(subtag
, "Sequence") == 0)
895 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
896 warning(line
, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
898 if(gMakeBinaryCollation
) {
899 UErrorCode intStatus
= U_ZERO_ERROR
;
901 /* do the collation elements */
903 uint8_t *data
= NULL
;
904 UCollator
*coll
= NULL
;
905 int32_t reorderCodes
[USCRIPT_CODE_LIMIT
+ (UCOL_REORDER_CODE_LIMIT
- UCOL_REORDER_CODE_FIRST
)];
906 uint32_t reorderCodeCount
;
907 int32_t reorderCodeIndex
;
908 UParseError parseError
;
910 genrbdata
.inputDir
= state
->inputdir
;
911 genrbdata
.outputDir
= state
->outputdir
;
913 coll
= ucol_openRulesForImport(member
->u
.fString
.fChars
, member
->u
.fString
.fLength
,
914 UCOL_OFF
, UCOL_DEFAULT_STRENGTH
,&parseError
, importFromDataFile
, &genrbdata
, &intStatus
);
916 if (U_SUCCESS(intStatus
) && coll
!= NULL
)
918 len
= ucol_cloneBinary(coll
, NULL
, 0, &intStatus
);
919 data
= (uint8_t *)uprv_malloc(len
);
920 intStatus
= U_ZERO_ERROR
;
921 len
= ucol_cloneBinary(coll
, data
, len
, &intStatus
);
922 /*data = ucol_cloneRuleData(coll, &len, &intStatus);*/
924 /* tailoring rules version */
926 /*coll->dataInfo.dataVersion[1] = version[0];*/
927 /* Copy tailoring version. Builder version already */
928 /* set in ucol_openRules */
929 ((UCATableHeader
*)data
)->version
[1] = version
[0];
930 ((UCATableHeader
*)data
)->version
[2] = version
[1];
931 ((UCATableHeader
*)data
)->version
[3] = version
[2];
933 if (U_SUCCESS(intStatus
) && data
!= NULL
)
935 struct SResource
*collationBin
= bin_open(state
->bundle
, "%%CollationBin", len
, data
, NULL
, NULL
, status
);
936 table_add(result
, collationBin
, line
, status
);
939 reorderCodeCount
= ucol_getReorderCodes(
940 coll
, reorderCodes
, USCRIPT_CODE_LIMIT
+ (UCOL_REORDER_CODE_LIMIT
- UCOL_REORDER_CODE_FIRST
), &intStatus
);
941 if (U_SUCCESS(intStatus
) && reorderCodeCount
> 0) {
942 struct SResource
*reorderCodeRes
= intvector_open(state
->bundle
, "%%ReorderCodes", NULL
, status
);
943 for (reorderCodeIndex
= 0; reorderCodeIndex
< reorderCodeCount
; reorderCodeIndex
++) {
944 intvector_add(reorderCodeRes
, reorderCodes
[reorderCodeIndex
], status
);
946 table_add(result
, reorderCodeRes
, line
, status
);
951 warning(line
, "could not obtain rules from collator");
953 *status
= U_INVALID_FORMAT_ERROR
;
962 if(intStatus
== U_FILE_ACCESS_ERROR
) {
963 error(startline
, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly.");
967 warning(line
, "%%Collation could not be constructed from CollationElements - check context!");
975 printf("Not building Collation binary\n");
979 /* in order to achieve smaller data files, we can direct genrb */
980 /* to omit collation rules */
981 if(gOmitCollationRules
) {
982 bundle_closeString(state
->bundle
, member
);
984 table_add(result
, member
, line
, status
);
988 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
990 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
992 if (U_FAILURE(*status
))
1000 /* A compiler warning will appear if all paths don't contain a return statement. */
1001 /* *status = U_INTERNAL_PROGRAM_ERROR;
1005 static struct SResource
*
1006 parseCollationElements(ParseState
* state
, char *tag
, uint32_t startline
, UBool newCollation
, UErrorCode
*status
)
1008 struct SResource
*result
= NULL
;
1009 struct SResource
*member
= NULL
;
1010 struct SResource
*collationRes
= NULL
;
1011 struct UString
*tokenValue
;
1012 struct UString comment
;
1013 enum ETokenType token
;
1014 char subtag
[1024], typeKeyword
[1024];
1017 result
= table_open(state
->bundle
, tag
, NULL
, status
);
1019 if (result
== NULL
|| U_FAILURE(*status
))
1024 printf(" collation elements %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1027 return addCollation(state
, result
, startline
, status
);
1031 ustr_init(&comment
);
1032 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
1034 if (token
== TOK_CLOSE_BRACE
)
1039 if (token
!= TOK_STRING
)
1042 *status
= U_INVALID_FORMAT_ERROR
;
1044 if (token
== TOK_EOF
)
1046 error(startline
, "unterminated table");
1050 error(line
, "Unexpected token %s", tokenNames
[token
]);
1056 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
1058 if (U_FAILURE(*status
))
1064 if (uprv_strcmp(subtag
, "default") == 0)
1066 member
= parseResource(state
, subtag
, NULL
, status
);
1068 if (U_FAILURE(*status
))
1074 table_add(result
, member
, line
, status
);
1078 token
= peekToken(state
, 0, &tokenValue
, &line
, &comment
, status
);
1079 /* this probably needs to be refactored or recursively use the parser */
1080 /* first we assume that our collation table won't have the explicit type */
1081 /* then, we cannot handle aliases */
1082 if(token
== TOK_OPEN_BRACE
) {
1083 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
1084 collationRes
= table_open(state
->bundle
, subtag
, NULL
, status
);
1085 collationRes
= addCollation(state
, collationRes
, startline
, status
); /* need to parse the collation data regardless */
1086 if (gIncludeUnihanColl
|| uprv_strcmp(subtag
, "unihan") != 0) {
1087 table_add(result
, collationRes
, startline
, status
);
1089 } else if(token
== TOK_COLON
) { /* right now, we'll just try to see if we have aliases */
1090 /* we could have a table too */
1091 token
= peekToken(state
, 1, &tokenValue
, &line
, &comment
, status
);
1092 u_UCharsToChars(tokenValue
->fChars
, typeKeyword
, u_strlen(tokenValue
->fChars
) + 1);
1093 if(uprv_strcmp(typeKeyword
, "alias") == 0) {
1094 member
= parseResource(state
, subtag
, NULL
, status
);
1096 if (U_FAILURE(*status
))
1102 table_add(result
, member
, line
, status
);
1105 *status
= U_INVALID_FORMAT_ERROR
;
1110 *status
= U_INVALID_FORMAT_ERROR
;
1115 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1117 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1119 if (U_FAILURE(*status
))
1128 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1129 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1130 static struct SResource
*
1131 realParseTable(ParseState
* state
, struct SResource
*table
, char *tag
, uint32_t startline
, UErrorCode
*status
)
1133 struct SResource
*member
= NULL
;
1134 struct UString
*tokenValue
=NULL
;
1135 struct UString comment
;
1136 enum ETokenType token
;
1139 UBool readToken
= FALSE
;
1141 /* '{' . (name resource)* '}' */
1143 printf(" parsing table %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1147 ustr_init(&comment
);
1148 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
1150 if (token
== TOK_CLOSE_BRACE
)
1153 warning(startline
, "Encountered empty table");
1158 if (token
!= TOK_STRING
)
1160 *status
= U_INVALID_FORMAT_ERROR
;
1162 if (token
== TOK_EOF
)
1164 error(startline
, "unterminated table");
1168 error(line
, "unexpected token %s", tokenNames
[token
]);
1174 if(uprv_isInvariantUString(tokenValue
->fChars
, -1)) {
1175 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
1177 *status
= U_INVALID_FORMAT_ERROR
;
1178 error(line
, "invariant characters required for table keys");
1182 if (U_FAILURE(*status
))
1184 error(line
, "parse error. Stopped parsing tokens with %s", u_errorName(*status
));
1188 member
= parseResource(state
, subtag
, &comment
, status
);
1190 if (member
== NULL
|| U_FAILURE(*status
))
1192 error(line
, "parse error. Stopped parsing resource with %s", u_errorName(*status
));
1196 table_add(table
, member
, line
, status
);
1198 if (U_FAILURE(*status
))
1200 error(line
, "parse error. Stopped parsing table with %s", u_errorName(*status
));
1204 ustr_deinit(&comment
);
1208 /* A compiler warning will appear if all paths don't contain a return statement. */
1209 /* *status = U_INTERNAL_PROGRAM_ERROR;
1213 static struct SResource
*
1214 parseTable(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1216 struct SResource
*result
;
1218 if (tag
!= NULL
&& uprv_strcmp(tag
, "CollationElements") == 0)
1220 return parseCollationElements(state
, tag
, startline
, FALSE
, status
);
1222 if (tag
!= NULL
&& uprv_strcmp(tag
, "collations") == 0)
1224 return parseCollationElements(state
, tag
, startline
, TRUE
, status
);
1227 printf(" table %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1230 result
= table_open(state
->bundle
, tag
, comment
, status
);
1232 if (result
== NULL
|| U_FAILURE(*status
))
1237 return realParseTable(state
, result
, tag
, startline
, status
);
1240 static struct SResource
*
1241 parseArray(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1243 struct SResource
*result
= NULL
;
1244 struct SResource
*member
= NULL
;
1245 struct UString
*tokenValue
;
1246 struct UString memberComments
;
1247 enum ETokenType token
;
1248 UBool readToken
= FALSE
;
1250 result
= array_open(state
->bundle
, tag
, comment
, status
);
1252 if (result
== NULL
|| U_FAILURE(*status
))
1257 printf(" array %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1260 ustr_init(&memberComments
);
1262 /* '{' . resource [','] '}' */
1266 ustr_setlen(&memberComments
, 0, status
);
1268 /* check for end of array, but don't consume next token unless it really is the end */
1269 token
= peekToken(state
, 0, &tokenValue
, NULL
, &memberComments
, status
);
1272 if (token
== TOK_CLOSE_BRACE
)
1274 getToken(state
, NULL
, NULL
, NULL
, status
);
1276 warning(startline
, "Encountered empty array");
1281 if (token
== TOK_EOF
)
1284 *status
= U_INVALID_FORMAT_ERROR
;
1285 error(startline
, "unterminated array");
1289 /* string arrays are a special case */
1290 if (token
== TOK_STRING
)
1292 getToken(state
, &tokenValue
, &memberComments
, NULL
, status
);
1293 member
= string_open(state
->bundle
, NULL
, tokenValue
->fChars
, tokenValue
->fLength
, &memberComments
, status
);
1297 member
= parseResource(state
, NULL
, &memberComments
, status
);
1300 if (member
== NULL
|| U_FAILURE(*status
))
1306 array_add(result
, member
, status
);
1308 if (U_FAILURE(*status
))
1314 /* eat optional comma if present */
1315 token
= peekToken(state
, 0, NULL
, NULL
, NULL
, status
);
1317 if (token
== TOK_COMMA
)
1319 getToken(state
, NULL
, NULL
, NULL
, status
);
1322 if (U_FAILURE(*status
))
1330 ustr_deinit(&memberComments
);
1334 static struct SResource
*
1335 parseIntVector(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1337 struct SResource
*result
= NULL
;
1338 enum ETokenType token
;
1341 UBool readToken
= FALSE
;
1344 struct UString memberComments
;
1346 result
= intvector_open(state
->bundle
, tag
, comment
, status
);
1348 if (result
== NULL
|| U_FAILURE(*status
))
1354 printf(" vector %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1356 ustr_init(&memberComments
);
1357 /* '{' . string [','] '}' */
1360 ustr_setlen(&memberComments
, 0, status
);
1362 /* check for end of array, but don't consume next token unless it really is the end */
1363 token
= peekToken(state
, 0, NULL
, NULL
,&memberComments
, status
);
1365 if (token
== TOK_CLOSE_BRACE
)
1367 /* it's the end, consume the close brace */
1368 getToken(state
, NULL
, NULL
, NULL
, status
);
1370 warning(startline
, "Encountered empty int vector");
1372 ustr_deinit(&memberComments
);
1376 string
= getInvariantString(state
, NULL
, NULL
, status
);
1378 if (U_FAILURE(*status
))
1384 /* For handling illegal char in the Intvector */
1385 value
= uprv_strtoul(string
, &stopstring
, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1386 len
=(uint32_t)(stopstring
-string
);
1388 if(len
==uprv_strlen(string
))
1390 intvector_add(result
, value
, status
);
1392 token
= peekToken(state
, 0, NULL
, NULL
, NULL
, status
);
1397 *status
=U_INVALID_CHAR_FOUND
;
1400 if (U_FAILURE(*status
))
1406 /* the comma is optional (even though it is required to prevent the reader from concatenating
1407 consecutive entries) so that a missing comma on the last entry isn't an error */
1408 if (token
== TOK_COMMA
)
1410 getToken(state
, NULL
, NULL
, NULL
, status
);
1416 /* A compiler warning will appear if all paths don't contain a return statement. */
1417 /* intvector_close(result, status);
1418 *status = U_INTERNAL_PROGRAM_ERROR;
1422 static struct SResource
*
1423 parseBinary(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1425 struct SResource
*result
= NULL
;
1428 char toConv
[3] = {'\0', '\0', '\0'};
1435 string
= getInvariantString(state
, &line
, NULL
, status
);
1437 if (string
== NULL
|| U_FAILURE(*status
))
1442 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1444 if (U_FAILURE(*status
))
1451 printf(" binary %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1454 count
= (uint32_t)uprv_strlen(string
);
1457 value
= uprv_malloc(sizeof(uint8_t) * count
);
1462 *status
= U_MEMORY_ALLOCATION_ERROR
;
1466 for (i
= 0; i
< count
; i
+= 2)
1468 toConv
[0] = string
[i
];
1469 toConv
[1] = string
[i
+ 1];
1471 value
[i
>> 1] = (uint8_t) uprv_strtoul(toConv
, &stopstring
, 16);
1472 len
=(uint32_t)(stopstring
-toConv
);
1474 if(len
!=uprv_strlen(toConv
))
1477 *status
=U_INVALID_CHAR_FOUND
;
1482 result
= bin_open(state
->bundle
, tag
, (i
>> 1), value
,NULL
, comment
, status
);
1488 *status
= U_INVALID_CHAR_FOUND
;
1490 error(line
, "Encountered invalid binary string");
1496 result
= bin_open(state
->bundle
, tag
, 0, NULL
, "",comment
,status
);
1497 warning(startline
, "Encountered empty binary tag");
1504 static struct SResource
*
1505 parseInteger(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1507 struct SResource
*result
= NULL
;
1513 string
= getInvariantString(state
, NULL
, NULL
, status
);
1515 if (string
== NULL
|| U_FAILURE(*status
))
1520 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1522 if (U_FAILURE(*status
))
1529 printf(" integer %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1532 if (uprv_strlen(string
) <= 0)
1534 warning(startline
, "Encountered empty integer. Default value is 0.");
1537 /* Allow integer support for hexdecimal, octal digit and decimal*/
1538 /* and handle illegal char in the integer*/
1539 value
= uprv_strtoul(string
, &stopstring
, 0);
1540 len
=(uint32_t)(stopstring
-string
);
1541 if(len
==uprv_strlen(string
))
1543 result
= int_open(state
->bundle
, tag
, value
, comment
, status
);
1547 *status
=U_INVALID_CHAR_FOUND
;
1554 static struct SResource
*
1555 parseImport(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
1557 struct SResource
*result
;
1563 char *fullname
= NULL
;
1564 int32_t numRead
= 0;
1565 filename
= getInvariantString(state
, &line
, NULL
, status
);
1567 if (U_FAILURE(*status
))
1572 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1574 if (U_FAILURE(*status
))
1576 uprv_free(filename
);
1581 printf(" import %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1584 /* Open the input file for reading */
1585 if (state
->inputdir
== NULL
)
1589 * Always save file file name, even if there's
1590 * no input directory specified. MIGHT BREAK SOMETHING
1592 int32_t filenameLength
= uprv_strlen(filename
);
1594 fullname
= (char *) uprv_malloc(filenameLength
+ 1);
1595 uprv_strcpy(fullname
, filename
);
1598 file
= T_FileStream_open(filename
, "rb");
1603 int32_t count
= (int32_t)uprv_strlen(filename
);
1605 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
1607 fullname
= (char *) uprv_malloc(state
->inputdirLength
+ count
+ 2);
1610 if(fullname
== NULL
)
1612 *status
= U_MEMORY_ALLOCATION_ERROR
;
1616 uprv_strcpy(fullname
, state
->inputdir
);
1618 fullname
[state
->inputdirLength
] = U_FILE_SEP_CHAR
;
1619 fullname
[state
->inputdirLength
+ 1] = '\0';
1621 uprv_strcat(fullname
, filename
);
1625 fullname
= (char *) uprv_malloc(state
->inputdirLength
+ count
+ 1);
1628 if(fullname
== NULL
)
1630 *status
= U_MEMORY_ALLOCATION_ERROR
;
1634 uprv_strcpy(fullname
, state
->inputdir
);
1635 uprv_strcat(fullname
, filename
);
1638 file
= T_FileStream_open(fullname
, "rb");
1644 error(line
, "couldn't open input file %s", filename
);
1645 *status
= U_FILE_ACCESS_ERROR
;
1649 len
= T_FileStream_size(file
);
1650 data
= (uint8_t*)uprv_malloc(len
* sizeof(uint8_t));
1654 *status
= U_MEMORY_ALLOCATION_ERROR
;
1655 T_FileStream_close (file
);
1659 numRead
= T_FileStream_read (file
, data
, len
);
1660 T_FileStream_close (file
);
1662 result
= bin_open(state
->bundle
, tag
, len
, data
, fullname
, comment
, status
);
1665 uprv_free(filename
);
1666 uprv_free(fullname
);
1671 static struct SResource
*
1672 parseInclude(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
1674 struct SResource
*result
;
1678 UChar
*pTarget
= NULL
;
1681 char *fullname
= NULL
;
1683 const char* cp
= NULL
;
1684 const UChar
* uBuffer
= NULL
;
1686 filename
= getInvariantString(state
, &line
, NULL
, status
);
1687 count
= (int32_t)uprv_strlen(filename
);
1689 if (U_FAILURE(*status
))
1694 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1696 if (U_FAILURE(*status
))
1698 uprv_free(filename
);
1703 printf(" include %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1706 fullname
= (char *) uprv_malloc(state
->inputdirLength
+ count
+ 2);
1708 if(fullname
== NULL
)
1710 *status
= U_MEMORY_ALLOCATION_ERROR
;
1711 uprv_free(filename
);
1715 if(state
->inputdir
!=NULL
){
1716 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
1719 uprv_strcpy(fullname
, state
->inputdir
);
1721 fullname
[state
->inputdirLength
] = U_FILE_SEP_CHAR
;
1722 fullname
[state
->inputdirLength
+ 1] = '\0';
1724 uprv_strcat(fullname
, filename
);
1728 uprv_strcpy(fullname
, state
->inputdir
);
1729 uprv_strcat(fullname
, filename
);
1732 uprv_strcpy(fullname
,filename
);
1735 ucbuf
= ucbuf_open(fullname
, &cp
,getShowWarning(),FALSE
,status
);
1737 if (U_FAILURE(*status
)) {
1738 error(line
, "couldn't open input file %s\n", filename
);
1742 uBuffer
= ucbuf_getBuffer(ucbuf
,&len
,status
);
1743 result
= string_open(state
->bundle
, tag
, uBuffer
, len
, comment
, status
);
1747 uprv_free(filename
);
1748 uprv_free(fullname
);
1757 U_STRING_DECL(k_type_string
, "string", 6);
1758 U_STRING_DECL(k_type_binary
, "binary", 6);
1759 U_STRING_DECL(k_type_bin
, "bin", 3);
1760 U_STRING_DECL(k_type_table
, "table", 5);
1761 U_STRING_DECL(k_type_table_no_fallback
, "table(nofallback)", 17);
1762 U_STRING_DECL(k_type_int
, "int", 3);
1763 U_STRING_DECL(k_type_integer
, "integer", 7);
1764 U_STRING_DECL(k_type_array
, "array", 5);
1765 U_STRING_DECL(k_type_alias
, "alias", 5);
1766 U_STRING_DECL(k_type_intvector
, "intvector", 9);
1767 U_STRING_DECL(k_type_import
, "import", 6);
1768 U_STRING_DECL(k_type_include
, "include", 7);
1769 U_STRING_DECL(k_type_reserved
, "reserved", 8);
1771 /* Various non-standard processing plugins that create one or more special resources. */
1772 U_STRING_DECL(k_type_plugin_uca_rules
, "process(uca_rules)", 18);
1773 U_STRING_DECL(k_type_plugin_collation
, "process(collation)", 18);
1774 U_STRING_DECL(k_type_plugin_transliterator
, "process(transliterator)", 23);
1775 U_STRING_DECL(k_type_plugin_dependency
, "process(dependency)", 19);
1777 typedef enum EResourceType
1783 RT_TABLE_NO_FALLBACK
,
1790 RT_PROCESS_UCA_RULES
,
1791 RT_PROCESS_COLLATION
,
1792 RT_PROCESS_TRANSLITERATOR
,
1793 RT_PROCESS_DEPENDENCY
,
1798 const char *nameChars
; /* only used for debugging */
1799 const UChar
*nameUChars
;
1800 ParseResourceFunction
*parseFunction
;
1801 } gResourceTypes
[] = {
1802 {"Unknown", NULL
, NULL
},
1803 {"string", k_type_string
, parseString
},
1804 {"binary", k_type_binary
, parseBinary
},
1805 {"table", k_type_table
, parseTable
},
1806 {"table(nofallback)", k_type_table_no_fallback
, NULL
}, /* parseFunction will never be called */
1807 {"integer", k_type_integer
, parseInteger
},
1808 {"array", k_type_array
, parseArray
},
1809 {"alias", k_type_alias
, parseAlias
},
1810 {"intvector", k_type_intvector
, parseIntVector
},
1811 {"import", k_type_import
, parseImport
},
1812 {"include", k_type_include
, parseInclude
},
1813 {"process(uca_rules)", k_type_plugin_uca_rules
, parseUCARules
},
1814 {"process(collation)", k_type_plugin_collation
, NULL
/* not implemented yet */},
1815 {"process(transliterator)", k_type_plugin_transliterator
, parseTransliterator
},
1816 {"process(dependency)", k_type_plugin_dependency
, parseDependency
},
1817 {"reserved", NULL
, NULL
}
1820 void initParser(UBool omitBinaryCollation
, UBool omitCollationRules
)
1822 U_STRING_INIT(k_type_string
, "string", 6);
1823 U_STRING_INIT(k_type_binary
, "binary", 6);
1824 U_STRING_INIT(k_type_bin
, "bin", 3);
1825 U_STRING_INIT(k_type_table
, "table", 5);
1826 U_STRING_INIT(k_type_table_no_fallback
, "table(nofallback)", 17);
1827 U_STRING_INIT(k_type_int
, "int", 3);
1828 U_STRING_INIT(k_type_integer
, "integer", 7);
1829 U_STRING_INIT(k_type_array
, "array", 5);
1830 U_STRING_INIT(k_type_alias
, "alias", 5);
1831 U_STRING_INIT(k_type_intvector
, "intvector", 9);
1832 U_STRING_INIT(k_type_import
, "import", 6);
1833 U_STRING_INIT(k_type_reserved
, "reserved", 8);
1834 U_STRING_INIT(k_type_include
, "include", 7);
1836 U_STRING_INIT(k_type_plugin_uca_rules
, "process(uca_rules)", 18);
1837 U_STRING_INIT(k_type_plugin_collation
, "process(collation)", 18);
1838 U_STRING_INIT(k_type_plugin_transliterator
, "process(transliterator)", 23);
1839 U_STRING_INIT(k_type_plugin_dependency
, "process(dependency)", 19);
1841 gMakeBinaryCollation
= !omitBinaryCollation
;
1842 gOmitCollationRules
= omitCollationRules
;
1845 static U_INLINE UBool
isTable(enum EResourceType type
) {
1846 return (UBool
)(type
==RT_TABLE
|| type
==RT_TABLE_NO_FALLBACK
);
1849 static enum EResourceType
1850 parseResourceType(ParseState
* state
, UErrorCode
*status
)
1852 struct UString
*tokenValue
;
1853 struct UString comment
;
1854 enum EResourceType result
= RT_UNKNOWN
;
1856 ustr_init(&comment
);
1857 expect(state
, TOK_STRING
, &tokenValue
, &comment
, &line
, status
);
1859 if (U_FAILURE(*status
))
1864 *status
= U_ZERO_ERROR
;
1866 /* Search for normal types */
1868 while (++result
< RT_RESERVED
) {
1869 if (u_strcmp(tokenValue
->fChars
, gResourceTypes
[result
].nameUChars
) == 0) {
1873 /* Now search for the aliases */
1874 if (u_strcmp(tokenValue
->fChars
, k_type_int
) == 0) {
1875 result
= RT_INTEGER
;
1877 else if (u_strcmp(tokenValue
->fChars
, k_type_bin
) == 0) {
1880 else if (result
== RT_RESERVED
) {
1881 char tokenBuffer
[1024];
1882 u_austrncpy(tokenBuffer
, tokenValue
->fChars
, sizeof(tokenBuffer
));
1883 tokenBuffer
[sizeof(tokenBuffer
) - 1] = 0;
1884 *status
= U_INVALID_FORMAT_ERROR
;
1885 error(line
, "unknown resource type '%s'", tokenBuffer
);
1891 /* parse a non-top-level resource */
1892 static struct SResource
*
1893 parseResource(ParseState
* state
, char *tag
, const struct UString
*comment
, UErrorCode
*status
)
1895 enum ETokenType token
;
1896 enum EResourceType resType
= RT_UNKNOWN
;
1897 ParseResourceFunction
*parseFunction
= NULL
;
1898 struct UString
*tokenValue
;
1902 token
= getToken(state
, &tokenValue
, NULL
, &startline
, status
);
1905 printf(" resource %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1908 /* name . [ ':' type ] '{' resource '}' */
1909 /* This function parses from the colon onwards. If the colon is present, parse the
1910 type then try to parse a resource of that type. If there is no explicit type,
1911 work it out using the lookahead tokens. */
1915 *status
= U_INVALID_FORMAT_ERROR
;
1916 error(startline
, "Unexpected EOF encountered");
1920 *status
= U_INVALID_FORMAT_ERROR
;
1924 resType
= parseResourceType(state
, status
);
1925 expect(state
, TOK_OPEN_BRACE
, &tokenValue
, NULL
, &startline
, status
);
1927 if (U_FAILURE(*status
))
1934 case TOK_OPEN_BRACE
:
1938 *status
= U_INVALID_FORMAT_ERROR
;
1939 error(startline
, "syntax error while reading a resource, expected '{' or ':'");
1943 if (resType
== RT_UNKNOWN
)
1945 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
1946 We could have any of the following:
1947 { { => array (nested)
1949 { string , => string array
1953 { string :/{ => table
1954 { string } => string
1957 token
= peekToken(state
, 0, NULL
, &line
, NULL
,status
);
1959 if (U_FAILURE(*status
))
1964 if (token
== TOK_OPEN_BRACE
|| token
== TOK_COLON
||token
==TOK_CLOSE_BRACE
)
1968 else if (token
== TOK_STRING
)
1970 token
= peekToken(state
, 1, NULL
, &line
, NULL
, status
);
1972 if (U_FAILURE(*status
))
1979 case TOK_COMMA
: resType
= RT_ARRAY
; break;
1980 case TOK_OPEN_BRACE
: resType
= RT_TABLE
; break;
1981 case TOK_CLOSE_BRACE
: resType
= RT_STRING
; break;
1982 case TOK_COLON
: resType
= RT_TABLE
; break;
1984 *status
= U_INVALID_FORMAT_ERROR
;
1985 error(line
, "Unexpected token after string, expected ',', '{' or '}'");
1991 *status
= U_INVALID_FORMAT_ERROR
;
1992 error(line
, "Unexpected token after '{'");
1996 /* printf("Type guessed as %s\n", resourceNames[resType]); */
1997 } else if(resType
== RT_TABLE_NO_FALLBACK
) {
1998 *status
= U_INVALID_FORMAT_ERROR
;
1999 error(startline
, "error: %s resource type not valid except on top bundle level", gResourceTypes
[resType
].nameChars
);
2003 /* We should now know what we need to parse next, so call the appropriate parser
2004 function and return. */
2005 parseFunction
= gResourceTypes
[resType
].parseFunction
;
2006 if (parseFunction
!= NULL
) {
2007 return parseFunction(state
, tag
, startline
, comment
, status
);
2010 *status
= U_INTERNAL_PROGRAM_ERROR
;
2011 error(startline
, "internal error: %s resource type found and not handled", gResourceTypes
[resType
].nameChars
);
2017 /* parse the top-level resource */
2019 parse(UCHARBUF
*buf
, const char *inputDir
, const char *outputDir
, UErrorCode
*status
)
2021 struct UString
*tokenValue
;
2022 struct UString comment
;
2024 enum EResourceType bundleType
;
2025 enum ETokenType token
;
2030 for (i
= 0; i
< MAX_LOOKAHEAD
+ 1; i
++)
2032 ustr_init(&state
.lookahead
[i
].value
);
2033 ustr_init(&state
.lookahead
[i
].comment
);
2036 initLookahead(&state
, buf
, status
);
2038 state
.inputdir
= inputDir
;
2039 state
.inputdirLength
= (state
.inputdir
!= NULL
) ? (uint32_t)uprv_strlen(state
.inputdir
) : 0;
2040 state
.outputdir
= outputDir
;
2041 state
.outputdirLength
= (state
.outputdir
!= NULL
) ? (uint32_t)uprv_strlen(state
.outputdir
) : 0;
2043 ustr_init(&comment
);
2044 expect(&state
, TOK_STRING
, &tokenValue
, &comment
, NULL
, status
);
2046 state
.bundle
= bundle_open(&comment
, FALSE
, status
);
2048 if (state
.bundle
== NULL
|| U_FAILURE(*status
))
2054 bundle_setlocale(state
.bundle
, tokenValue
->fChars
, status
);
2056 /* The following code is to make Empty bundle work no matter with :table specifer or not */
2057 token
= getToken(&state
, NULL
, NULL
, &line
, status
);
2058 if(token
==TOK_COLON
) {
2059 *status
=U_ZERO_ERROR
;
2060 bundleType
=parseResourceType(&state
, status
);
2062 if(isTable(bundleType
))
2064 expect(&state
, TOK_OPEN_BRACE
, NULL
, NULL
, &line
, status
);
2068 *status
=U_PARSE_ERROR
;
2069 /* printf("asdsdweqdasdad\n"); */
2071 error(line
, "parse error. Stopped parsing with %s", u_errorName(*status
));
2077 if(token
==TOK_OPEN_BRACE
)
2079 *status
=U_ZERO_ERROR
;
2080 bundleType
=RT_TABLE
;
2084 /* neither colon nor open brace */
2085 *status
=U_PARSE_ERROR
;
2086 bundleType
=RT_UNKNOWN
;
2087 error(line
, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status
));
2091 if (U_FAILURE(*status
))
2093 bundle_close(state
.bundle
, status
);
2097 if(bundleType
==RT_TABLE_NO_FALLBACK
) {
2099 * Parse a top-level table with the table(nofallback) declaration.
2100 * This is the same as a regular table, but also sets the
2101 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2103 state
.bundle
->noFallback
=TRUE
;
2105 /* top-level tables need not handle special table names like "collations" */
2106 realParseTable(&state
, state
.bundle
->fRoot
, NULL
, line
, status
);
2108 if(dependencyArray
!=NULL
){
2109 table_add(state
.bundle
->fRoot
, dependencyArray
, 0, status
);
2110 dependencyArray
= NULL
;
2112 if (U_FAILURE(*status
))
2114 bundle_close(state
.bundle
, status
);
2115 res_close(dependencyArray
);
2119 if (getToken(&state
, NULL
, NULL
, &line
, status
) != TOK_EOF
)
2121 warning(line
, "extraneous text after resource bundle (perhaps unmatched braces)");
2123 *status
= U_INVALID_FORMAT_ERROR
;
2128 cleanupLookahead(&state
);
2129 ustr_deinit(&comment
);
2130 return state
.bundle
;