2 *******************************************************************************
4 * Copyright (C) 1998-2012, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 05/26/99 stephen Creation.
15 * 02/25/00 weiv Overhaul to write udata
16 * 5/10/01 Ram removed ustdio dependency
17 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
18 *******************************************************************************
33 #include "unicode/ustring.h"
34 #include "unicode/uscript.h"
35 #include "unicode/putil.h"
38 /* Number of tokens to read ahead of the current stream position */
39 #define MAX_LOOKAHEAD 3
49 #define STARTCOMMAND 0x005B
50 #define ENDCOMMAND 0x005D
51 #define OPENSQBRACKET 0x005B
52 #define CLOSESQBRACKET 0x005D
58 struct UString comment
;
62 /* keep in sync with token defines in read.h */
63 const char *tokenNames
[TOK_TOKEN_COUNT
] =
65 "string", /* A string token, such as "MonthNames" */
66 "'{'", /* An opening brace character */
67 "'}'", /* A closing brace character */
71 "<end of file>", /* End of the file has been reached successfully */
75 /* Just to store "TRUE" */
76 static const UChar trueValue
[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
79 struct Lookahead lookahead
[MAX_LOOKAHEAD
+ 1];
80 uint32_t lookaheadPosition
;
82 struct SRBRoot
*bundle
;
84 uint32_t inputdirLength
;
85 const char *outputdir
;
86 uint32_t outputdirLength
;
87 UBool makeBinaryCollation
;
90 static UBool gOmitCollationRules
= FALSE
;
92 typedef struct SResource
*
93 ParseResourceFunction(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
);
95 static struct SResource
*parseResource(ParseState
* state
, char *tag
, const struct UString
*comment
, UErrorCode
*status
);
97 /* The nature of the lookahead buffer:
98 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
99 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
100 When getToken is called, the current pointer is moved to the next slot and the
101 old slot is filled with the next token from the reader by calling getNextToken.
102 The token values are stored in the slot, which means that token values don't
103 survive a call to getToken, ie.
107 getToken(&value, NULL, status);
108 getToken(NULL, NULL, status); bad - value is now a different string
111 initLookahead(ParseState
* state
, UCHARBUF
*buf
, UErrorCode
*status
)
113 static uint32_t initTypeStrings
= 0;
116 if (!initTypeStrings
)
121 state
->lookaheadPosition
= 0;
126 for (i
= 0; i
< MAX_LOOKAHEAD
; i
++)
128 state
->lookahead
[i
].type
= getNextToken(state
->buffer
, &state
->lookahead
[i
].value
, &state
->lookahead
[i
].line
, &state
->lookahead
[i
].comment
, status
);
129 if (U_FAILURE(*status
))
135 *status
= U_ZERO_ERROR
;
139 cleanupLookahead(ParseState
* state
)
142 for (i
= 0; i
< MAX_LOOKAHEAD
; i
++)
144 ustr_deinit(&state
->lookahead
[i
].value
);
145 ustr_deinit(&state
->lookahead
[i
].comment
);
150 static enum ETokenType
151 getToken(ParseState
* state
, struct UString
**tokenValue
, struct UString
* comment
, uint32_t *linenumber
, UErrorCode
*status
)
153 enum ETokenType result
;
156 result
= state
->lookahead
[state
->lookaheadPosition
].type
;
158 if (tokenValue
!= NULL
)
160 *tokenValue
= &state
->lookahead
[state
->lookaheadPosition
].value
;
163 if (linenumber
!= NULL
)
165 *linenumber
= state
->lookahead
[state
->lookaheadPosition
].line
;
170 ustr_cpy(comment
, &(state
->lookahead
[state
->lookaheadPosition
].comment
), status
);
173 i
= (state
->lookaheadPosition
+ MAX_LOOKAHEAD
) % (MAX_LOOKAHEAD
+ 1);
174 state
->lookaheadPosition
= (state
->lookaheadPosition
+ 1) % (MAX_LOOKAHEAD
+ 1);
175 ustr_setlen(&state
->lookahead
[i
].comment
, 0, status
);
176 ustr_setlen(&state
->lookahead
[i
].value
, 0, status
);
177 state
->lookahead
[i
].type
= getNextToken(state
->buffer
, &state
->lookahead
[i
].value
, &state
->lookahead
[i
].line
, &state
->lookahead
[i
].comment
, status
);
179 /* printf("getToken, returning %s\n", tokenNames[result]); */
184 static enum ETokenType
185 peekToken(ParseState
* state
, uint32_t lookaheadCount
, struct UString
**tokenValue
, uint32_t *linenumber
, struct UString
*comment
, UErrorCode
*status
)
187 uint32_t i
= (state
->lookaheadPosition
+ lookaheadCount
) % (MAX_LOOKAHEAD
+ 1);
189 if (U_FAILURE(*status
))
194 if (lookaheadCount
>= MAX_LOOKAHEAD
)
196 *status
= U_INTERNAL_PROGRAM_ERROR
;
200 if (tokenValue
!= NULL
)
202 *tokenValue
= &state
->lookahead
[i
].value
;
205 if (linenumber
!= NULL
)
207 *linenumber
= state
->lookahead
[i
].line
;
211 ustr_cpy(comment
, &(state
->lookahead
[state
->lookaheadPosition
].comment
), status
);
214 return state
->lookahead
[i
].type
;
218 expect(ParseState
* state
, enum ETokenType expectedToken
, struct UString
**tokenValue
, struct UString
*comment
, uint32_t *linenumber
, UErrorCode
*status
)
222 enum ETokenType token
= getToken(state
, tokenValue
, comment
, &line
, status
);
224 if (linenumber
!= NULL
)
229 if (U_FAILURE(*status
))
234 if (token
!= expectedToken
)
236 *status
= U_INVALID_FORMAT_ERROR
;
237 error(line
, "expecting %s, got %s", tokenNames
[expectedToken
], tokenNames
[token
]);
241 *status
= U_ZERO_ERROR
;
245 static char *getInvariantString(ParseState
* state
, uint32_t *line
, struct UString
*comment
, UErrorCode
*status
)
247 struct UString
*tokenValue
;
251 expect(state
, TOK_STRING
, &tokenValue
, comment
, line
, status
);
253 if (U_FAILURE(*status
))
258 count
= u_strlen(tokenValue
->fChars
);
259 if(!uprv_isInvariantUString(tokenValue
->fChars
, count
)) {
260 *status
= U_INVALID_FORMAT_ERROR
;
261 error(*line
, "invariant characters required for table keys, binary data, etc.");
265 result
= reinterpret_cast<char *>(uprv_malloc(count
+1));
269 *status
= U_MEMORY_ALLOCATION_ERROR
;
273 u_UCharsToChars(tokenValue
->fChars
, result
, count
+1);
277 static struct SResource
*
278 parseUCARules(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* /*comment*/, UErrorCode
*status
)
280 struct SResource
*result
= NULL
;
281 struct UString
*tokenValue
;
282 FileStream
*file
= NULL
;
283 char filename
[256] = { '\0' };
284 char cs
[128] = { '\0' };
287 UBool quoted
= FALSE
;
288 UCHARBUF
*ucbuf
=NULL
;
290 const char* cp
= NULL
;
291 UChar
*pTarget
= NULL
;
292 UChar
*target
= NULL
;
293 UChar
*targetLimit
= NULL
;
296 expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
);
299 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
302 if (U_FAILURE(*status
))
306 /* make the filename including the directory */
307 if (state
->inputdir
!= NULL
)
309 uprv_strcat(filename
, state
->inputdir
);
311 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
313 uprv_strcat(filename
, U_FILE_SEP_STRING
);
317 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
319 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
321 if (U_FAILURE(*status
))
325 uprv_strcat(filename
, cs
);
327 if(gOmitCollationRules
) {
331 ucbuf
= ucbuf_open(filename
, &cp
, getShowWarning(),FALSE
, status
);
333 if (U_FAILURE(*status
)) {
334 error(line
, "An error occured while opening the input file %s\n", filename
);
338 /* We allocate more space than actually required
339 * since the actual size needed for storing UChars
340 * is not known in UTF-8 byte stream
342 size
= ucbuf_size(ucbuf
) + 1;
343 pTarget
= (UChar
*) uprv_malloc(U_SIZEOF_UCHAR
* size
);
344 uprv_memset(pTarget
, 0, size
*U_SIZEOF_UCHAR
);
346 targetLimit
= pTarget
+size
;
348 /* read the rules into the buffer */
349 while (target
< targetLimit
)
351 c
= ucbuf_getc(ucbuf
, status
);
353 quoted
= (UBool
)!quoted
;
355 /* weiv (06/26/2002): adding the following:
356 * - preserving spaces in commands [...]
357 * - # comments until the end of line
359 if (c
== STARTCOMMAND
&& !quoted
)
362 * closing bracket will be handled by the
363 * append at the end of the loop
365 while(c
!= ENDCOMMAND
) {
366 U_APPEND_CHAR32(c
, target
,len
);
367 c
= ucbuf_getc(ucbuf
, status
);
370 else if (c
== HASH
&& !quoted
) {
372 while(c
!= CR
&& c
!= LF
) {
373 c
= ucbuf_getc(ucbuf
, status
);
377 else if (c
== ESCAPE
)
379 c
= unescape(ucbuf
, status
);
381 if (c
== (UChar32
)U_ERR
)
384 T_FileStream_close(file
);
388 else if (!quoted
&& (c
== SPACE
|| c
== TAB
|| c
== CR
|| c
== LF
))
390 /* ignore spaces carriage returns
391 * and line feed unless in the form \uXXXX
396 /* Append UChar * after dissembling if c > 0xffff*/
397 if (c
!= (UChar32
)U_EOF
)
399 U_APPEND_CHAR32(c
, target
,len
);
407 /* terminate the string */
408 if(target
< targetLimit
){
412 result
= string_open(state
->bundle
, tag
, pTarget
, (int32_t)(target
- pTarget
), NULL
, status
);
417 T_FileStream_close(file
);
422 static struct SResource
*
423 parseTransliterator(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* /*comment*/, UErrorCode
*status
)
425 struct SResource
*result
= NULL
;
426 struct UString
*tokenValue
;
427 FileStream
*file
= NULL
;
428 char filename
[256] = { '\0' };
429 char cs
[128] = { '\0' };
431 UCHARBUF
*ucbuf
=NULL
;
432 const char* cp
= NULL
;
433 UChar
*pTarget
= NULL
;
434 const UChar
*pSource
= NULL
;
437 expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
);
440 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
443 if (U_FAILURE(*status
))
447 /* make the filename including the directory */
448 if (state
->inputdir
!= NULL
)
450 uprv_strcat(filename
, state
->inputdir
);
452 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
454 uprv_strcat(filename
, U_FILE_SEP_STRING
);
458 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
460 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
462 if (U_FAILURE(*status
))
466 uprv_strcat(filename
, cs
);
469 ucbuf
= ucbuf_open(filename
, &cp
, getShowWarning(),FALSE
, status
);
471 if (U_FAILURE(*status
)) {
472 error(line
, "An error occured while opening the input file %s\n", filename
);
476 /* We allocate more space than actually required
477 * since the actual size needed for storing UChars
478 * is not known in UTF-8 byte stream
480 pSource
= ucbuf_getBuffer(ucbuf
, &size
, status
);
481 pTarget
= (UChar
*) uprv_malloc(U_SIZEOF_UCHAR
* (size
+ 1));
482 uprv_memset(pTarget
, 0, size
*U_SIZEOF_UCHAR
);
484 #if !UCONFIG_NO_TRANSLITERATION
485 size
= utrans_stripRules(pSource
, size
, pTarget
, status
);
488 fprintf(stderr
, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
490 result
= string_open(state
->bundle
, tag
, pTarget
, size
, NULL
, status
);
494 T_FileStream_close(file
);
498 static struct SResource
* dependencyArray
= NULL
;
500 static struct SResource
*
501 parseDependency(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
503 struct SResource
*result
= NULL
;
504 struct SResource
*elem
= NULL
;
505 struct UString
*tokenValue
;
507 char filename
[256] = { '\0' };
508 char cs
[128] = { '\0' };
510 expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
);
513 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
516 if (U_FAILURE(*status
))
520 /* make the filename including the directory */
521 if (state
->outputdir
!= NULL
)
523 uprv_strcat(filename
, state
->outputdir
);
525 if (state
->outputdir
[state
->outputdirLength
- 1] != U_FILE_SEP_CHAR
)
527 uprv_strcat(filename
, U_FILE_SEP_STRING
);
531 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
533 if (U_FAILURE(*status
))
537 uprv_strcat(filename
, cs
);
538 if(!T_FileStream_file_exists(filename
)){
540 error(line
, "The dependency file %s does not exist. Please make sure it exists.\n",filename
);
542 warning(line
, "The dependency file %s does not exist. Please make sure it exists.\n",filename
);
545 if(dependencyArray
==NULL
){
546 dependencyArray
= array_open(state
->bundle
, "%%DEPENDENCY", NULL
, status
);
549 result
= string_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
551 elem
= string_open(state
->bundle
, NULL
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
553 array_add(dependencyArray
, elem
, status
);
555 if (U_FAILURE(*status
))
559 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
562 static struct SResource
*
563 parseString(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
565 struct UString
*tokenValue
;
566 struct SResource
*result
= NULL
;
568 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
570 return parseUCARules(tag, startline, status);
573 printf(" string %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
575 expect(state
, TOK_STRING
, &tokenValue
, NULL
, NULL
, status
);
577 if (U_SUCCESS(*status
))
579 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
580 doesn't survive expect either) */
582 result
= string_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
583 if(U_SUCCESS(*status
) && result
) {
584 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
586 if (U_FAILURE(*status
))
597 static struct SResource
*
598 parseAlias(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
600 struct UString
*tokenValue
;
601 struct SResource
*result
= NULL
;
603 expect(state
, TOK_STRING
, &tokenValue
, NULL
, NULL
, status
);
606 printf(" alias %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
609 if (U_SUCCESS(*status
))
611 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
612 doesn't survive expect either) */
614 result
= alias_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
616 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
618 if (U_FAILURE(*status
))
629 const char* inputDir
;
630 const char* outputDir
;
633 static struct SResource
* resLookup(struct SResource
* res
, const char* key
){
634 struct SResource
*current
= NULL
;
635 struct SResTable
*list
;
636 if (res
== res_none()) {
640 list
= &(res
->u
.fTable
);
642 current
= list
->fFirst
;
643 while (current
!= NULL
) {
644 if (uprv_strcmp(((list
->fRoot
->fKeys
) + (current
->fKey
)), key
) == 0) {
647 current
= current
->fNext
;
652 static const UChar
* importFromDataFile(void* context
, const char* locale
, const char* type
, int32_t* pLength
, UErrorCode
* status
){
653 struct SRBRoot
*data
= NULL
;
654 UCHARBUF
*ucbuf
= NULL
;
655 GenrbData
* genrbdata
= (GenrbData
*) context
;
656 int localeLength
= strlen(locale
);
657 char* filename
= (char*)uprv_malloc(localeLength
+5);
658 char *inputDirBuf
= NULL
;
659 char *openFileName
= NULL
;
661 UChar
* urules
= NULL
;
662 int32_t urulesLength
= 0;
666 struct SResource
* root
;
667 struct SResource
* collations
;
668 struct SResource
* collation
;
669 struct SResource
* sequence
;
671 memcpy(filename
, locale
, localeLength
);
672 for(i
= 0; i
< localeLength
; i
++){
673 if(filename
[i
] == '-'){
677 filename
[localeLength
] = '.';
678 filename
[localeLength
+1] = 't';
679 filename
[localeLength
+2] = 'x';
680 filename
[localeLength
+3] = 't';
681 filename
[localeLength
+4] = 0;
684 if (status
==NULL
|| U_FAILURE(*status
)) {
688 *status
=U_ILLEGAL_ARGUMENT_ERROR
;
691 filelen
= (int32_t)uprv_strlen(filename
);
693 if(genrbdata
->inputDir
== NULL
) {
694 const char *filenameBegin
= uprv_strrchr(filename
, U_FILE_SEP_CHAR
);
695 openFileName
= (char *) uprv_malloc(dirlen
+ filelen
+ 2);
696 openFileName
[0] = '\0';
697 if (filenameBegin
!= NULL
) {
699 * When a filename ../../../data/root.txt is specified,
700 * we presume that the input directory is ../../../data
701 * This is very important when the resource file includes
702 * another file, like UCARules.txt or thaidict.brk.
704 int32_t filenameSize
= (int32_t)(filenameBegin
- filename
+ 1);
705 inputDirBuf
= uprv_strncpy((char *)uprv_malloc(filenameSize
), filename
, filenameSize
);
708 if(inputDirBuf
== NULL
) {
709 *status
= U_MEMORY_ALLOCATION_ERROR
;
713 inputDirBuf
[filenameSize
- 1] = 0;
714 genrbdata
->inputDir
= inputDirBuf
;
715 dirlen
= (int32_t)uprv_strlen(genrbdata
->inputDir
);
718 dirlen
= (int32_t)uprv_strlen(genrbdata
->inputDir
);
720 if(genrbdata
->inputDir
[dirlen
-1] != U_FILE_SEP_CHAR
) {
721 openFileName
= (char *) uprv_malloc(dirlen
+ filelen
+ 2);
724 if(openFileName
== NULL
) {
725 *status
= U_MEMORY_ALLOCATION_ERROR
;
729 openFileName
[0] = '\0';
731 * append the input dir to openFileName if the first char in
732 * filename is not file seperation char and the last char input directory is not '.'.
733 * This is to support :
734 * genrb -s. /home/icu/data
736 * The user cannot mix notations like
737 * genrb -s. /icu/data --- the absolute path specified. -s redundant
739 * genrb -s. icu/data --- start from CWD and look in icu/data dir
741 if( (filename
[0] != U_FILE_SEP_CHAR
) && (genrbdata
->inputDir
[dirlen
-1] !='.')){
742 uprv_strcpy(openFileName
, genrbdata
->inputDir
);
743 openFileName
[dirlen
] = U_FILE_SEP_CHAR
;
745 openFileName
[dirlen
+ 1] = '\0';
747 openFileName
= (char *) uprv_malloc(dirlen
+ filelen
+ 1);
750 if(openFileName
== NULL
) {
751 *status
= U_MEMORY_ALLOCATION_ERROR
;
755 uprv_strcpy(openFileName
, genrbdata
->inputDir
);
759 uprv_strcat(openFileName
, filename
);
760 /* printf("%s\n", openFileName); */
761 *status
= U_ZERO_ERROR
;
762 ucbuf
= ucbuf_open(openFileName
, &cp
,getShowWarning(),TRUE
, status
);
764 if(*status
== U_FILE_ACCESS_ERROR
) {
766 fprintf(stderr
, "couldn't open file %s\n", openFileName
== NULL
? filename
: openFileName
);
769 if (ucbuf
== NULL
|| U_FAILURE(*status
)) {
770 fprintf(stderr
, "An error occured processing file %s. Error: %s\n", openFileName
== NULL
? filename
: openFileName
,u_errorName(*status
));
774 /* Parse the data into an SRBRoot */
775 data
= parse(ucbuf
, genrbdata
->inputDir
, genrbdata
->outputDir
, FALSE
, status
);
778 collations
= resLookup(root
, "collations");
779 if (collations
!= NULL
) {
780 collation
= resLookup(collations
, type
);
781 if (collation
!= NULL
) {
782 sequence
= resLookup(collation
, "Sequence");
783 if (sequence
!= NULL
) {
784 urules
= sequence
->u
.fString
.fChars
;
785 urulesLength
= sequence
->u
.fString
.fLength
;
786 *pLength
= urulesLength
;
792 if (inputDirBuf
!= NULL
) {
793 uprv_free(inputDirBuf
);
796 if (openFileName
!= NULL
) {
797 uprv_free(openFileName
);
807 // Quick-and-dirty escaping function.
808 // Assumes that we are on an ASCII-based platform.
810 escape(const UChar
*s
, char *buffer
) {
811 int32_t length
= u_strlen(s
);
815 U16_NEXT(s
, i
, length
, c
);
819 } else if (0x20 <= c
&& c
<= 0x7e) {
821 *buffer
++ = (char)c
; // assumes ASCII-based platform
823 buffer
+= sprintf(buffer
, "\\u%04X", (int)c
);
828 static struct SResource
*
829 addCollation(ParseState
* state
, struct SResource
*result
, uint32_t startline
, UErrorCode
*status
)
831 struct SResource
*member
= NULL
;
832 struct UString
*tokenValue
;
833 struct UString comment
;
834 enum ETokenType token
;
836 UVersionInfo version
;
839 /* '{' . (name resource)* '}' */
840 version
[0]=0; version
[1]=0; version
[2]=0; version
[3]=0;
845 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
847 if (token
== TOK_CLOSE_BRACE
)
852 if (token
!= TOK_STRING
)
855 *status
= U_INVALID_FORMAT_ERROR
;
857 if (token
== TOK_EOF
)
859 error(startline
, "unterminated table");
863 error(line
, "Unexpected token %s", tokenNames
[token
]);
869 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
871 if (U_FAILURE(*status
))
877 member
= parseResource(state
, subtag
, NULL
, status
);
879 if (U_FAILURE(*status
))
885 if (uprv_strcmp(subtag
, "Version") == 0)
888 int32_t length
= member
->u
.fString
.fLength
;
890 if (length
>= (int32_t) sizeof(ver
))
892 length
= (int32_t) sizeof(ver
) - 1;
895 u_UCharsToChars(member
->u
.fString
.fChars
, ver
, length
+ 1); /* +1 for copying NULL */
896 u_versionFromString(version
, ver
);
898 table_add(result
, member
, line
, status
);
901 else if (uprv_strcmp(subtag
, "Override") == 0)
903 // UBool override = (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0);
904 table_add(result
, member
, line
, status
);
907 else if(uprv_strcmp(subtag
, "%%CollationBin")==0)
909 /* discard duplicate %%CollationBin if any*/
911 else if (uprv_strcmp(subtag
, "Sequence") == 0)
913 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
914 warning(line
, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
916 if(state
->makeBinaryCollation
) {
918 /* do the collation elements */
920 uint8_t *data
= NULL
;
921 UCollator
*coll
= NULL
;
922 int32_t reorderCodes
[USCRIPT_CODE_LIMIT
+ (UCOL_REORDER_CODE_LIMIT
- UCOL_REORDER_CODE_FIRST
)];
923 int32_t reorderCodeCount
;
924 int32_t reorderCodeIndex
;
925 UParseError parseError
;
927 genrbdata
.inputDir
= state
->inputdir
;
928 genrbdata
.outputDir
= state
->outputdir
;
930 UErrorCode intStatus
= U_ZERO_ERROR
;
931 uprv_memset(&parseError
, 0, sizeof(parseError
));
932 coll
= ucol_openRulesForImport(member
->u
.fString
.fChars
, member
->u
.fString
.fLength
,
933 UCOL_OFF
, UCOL_DEFAULT_STRENGTH
,&parseError
, importFromDataFile
, &genrbdata
, &intStatus
);
935 if (U_SUCCESS(intStatus
) && coll
!= NULL
)
937 len
= ucol_cloneBinary(coll
, NULL
, 0, &intStatus
);
938 data
= (uint8_t *)uprv_malloc(len
);
939 intStatus
= U_ZERO_ERROR
;
940 len
= ucol_cloneBinary(coll
, data
, len
, &intStatus
);
941 /*data = ucol_cloneRuleData(coll, &len, &intStatus);*/
943 /* tailoring rules version */
945 /*coll->dataInfo.dataVersion[1] = version[0];*/
946 /* Copy tailoring version. Builder version already */
947 /* set in ucol_openRules */
948 ((UCATableHeader
*)data
)->version
[1] = version
[0];
949 ((UCATableHeader
*)data
)->version
[2] = version
[1];
950 ((UCATableHeader
*)data
)->version
[3] = version
[2];
952 if (U_SUCCESS(intStatus
) && data
!= NULL
)
954 struct SResource
*collationBin
= bin_open(state
->bundle
, "%%CollationBin", len
, data
, NULL
, NULL
, status
);
955 table_add(result
, collationBin
, line
, status
);
958 reorderCodeCount
= ucol_getReorderCodes(
959 coll
, reorderCodes
, USCRIPT_CODE_LIMIT
+ (UCOL_REORDER_CODE_LIMIT
- UCOL_REORDER_CODE_FIRST
), &intStatus
);
960 if (U_SUCCESS(intStatus
) && reorderCodeCount
> 0) {
961 struct SResource
*reorderCodeRes
= intvector_open(state
->bundle
, "%%ReorderCodes", NULL
, status
);
962 for (reorderCodeIndex
= 0; reorderCodeIndex
< reorderCodeCount
; reorderCodeIndex
++) {
963 intvector_add(reorderCodeRes
, reorderCodes
[reorderCodeIndex
], status
);
965 table_add(result
, reorderCodeRes
, line
, status
);
970 warning(line
, "could not obtain rules from collator");
972 *status
= U_INVALID_FORMAT_ERROR
;
981 if(intStatus
== U_FILE_ACCESS_ERROR
) {
982 error(startline
, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly.");
986 char preBuffer
[100], postBuffer
[100];
987 escape(parseError
.preContext
, preBuffer
);
988 escape(parseError
.postContext
, postBuffer
);
990 "%%%%CollationBin could not be constructed from CollationElements\n"
991 " check context, check that the FractionalUCA.txt UCA version "
992 "matches the current UCD version\n"
993 " UErrorCode=%s UParseError={ line=%d offset=%d pre=<> post=<> }",
994 u_errorName(intStatus
),
1000 *status
= intStatus
;
1006 printf("Not building Collation binary\n");
1010 /* in order to achieve smaller data files, we can direct genrb */
1011 /* to omit collation rules */
1012 if(gOmitCollationRules
) {
1013 bundle_closeString(state
->bundle
, member
);
1015 table_add(result
, member
, line
, status
);
1018 if (U_FAILURE(*status
))
1025 // Reached the end without a TOK_CLOSE_BRACE. Should be an error.
1026 *status
= U_INTERNAL_PROGRAM_ERROR
;
1030 static struct SResource
*
1031 parseCollationElements(ParseState
* state
, char *tag
, uint32_t startline
, UBool newCollation
, UErrorCode
*status
)
1033 struct SResource
*result
= NULL
;
1034 struct SResource
*member
= NULL
;
1035 struct SResource
*collationRes
= NULL
;
1036 struct UString
*tokenValue
;
1037 struct UString comment
;
1038 enum ETokenType token
;
1039 char subtag
[1024], typeKeyword
[1024];
1042 result
= table_open(state
->bundle
, tag
, NULL
, status
);
1044 if (result
== NULL
|| U_FAILURE(*status
))
1049 printf(" collation elements %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1052 return addCollation(state
, result
, startline
, status
);
1056 ustr_init(&comment
);
1057 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
1059 if (token
== TOK_CLOSE_BRACE
)
1064 if (token
!= TOK_STRING
)
1067 *status
= U_INVALID_FORMAT_ERROR
;
1069 if (token
== TOK_EOF
)
1071 error(startline
, "unterminated table");
1075 error(line
, "Unexpected token %s", tokenNames
[token
]);
1081 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
1083 if (U_FAILURE(*status
))
1089 if (uprv_strcmp(subtag
, "default") == 0)
1091 member
= parseResource(state
, subtag
, NULL
, status
);
1093 if (U_FAILURE(*status
))
1099 table_add(result
, member
, line
, status
);
1103 token
= peekToken(state
, 0, &tokenValue
, &line
, &comment
, status
);
1104 /* this probably needs to be refactored or recursively use the parser */
1105 /* first we assume that our collation table won't have the explicit type */
1106 /* then, we cannot handle aliases */
1107 if(token
== TOK_OPEN_BRACE
) {
1108 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
1109 collationRes
= table_open(state
->bundle
, subtag
, NULL
, status
);
1110 collationRes
= addCollation(state
, collationRes
, startline
, status
); /* need to parse the collation data regardless */
1111 if (gIncludeUnihanColl
|| uprv_strcmp(subtag
, "unihan") != 0) {
1112 table_add(result
, collationRes
, startline
, status
);
1114 } else if(token
== TOK_COLON
) { /* right now, we'll just try to see if we have aliases */
1115 /* we could have a table too */
1116 token
= peekToken(state
, 1, &tokenValue
, &line
, &comment
, status
);
1117 u_UCharsToChars(tokenValue
->fChars
, typeKeyword
, u_strlen(tokenValue
->fChars
) + 1);
1118 if(uprv_strcmp(typeKeyword
, "alias") == 0) {
1119 member
= parseResource(state
, subtag
, NULL
, status
);
1120 if (U_FAILURE(*status
))
1126 table_add(result
, member
, line
, status
);
1129 *status
= U_INVALID_FORMAT_ERROR
;
1134 *status
= U_INVALID_FORMAT_ERROR
;
1139 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1141 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1143 if (U_FAILURE(*status
))
1152 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1153 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1154 static struct SResource
*
1155 realParseTable(ParseState
* state
, struct SResource
*table
, char *tag
, uint32_t startline
, UErrorCode
*status
)
1157 struct SResource
*member
= NULL
;
1158 struct UString
*tokenValue
=NULL
;
1159 struct UString comment
;
1160 enum ETokenType token
;
1163 UBool readToken
= FALSE
;
1165 /* '{' . (name resource)* '}' */
1168 printf(" parsing table %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1172 ustr_init(&comment
);
1173 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
1175 if (token
== TOK_CLOSE_BRACE
)
1178 warning(startline
, "Encountered empty table");
1183 if (token
!= TOK_STRING
)
1185 *status
= U_INVALID_FORMAT_ERROR
;
1187 if (token
== TOK_EOF
)
1189 error(startline
, "unterminated table");
1193 error(line
, "unexpected token %s", tokenNames
[token
]);
1199 if(uprv_isInvariantUString(tokenValue
->fChars
, -1)) {
1200 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
1202 *status
= U_INVALID_FORMAT_ERROR
;
1203 error(line
, "invariant characters required for table keys");
1207 if (U_FAILURE(*status
))
1209 error(line
, "parse error. Stopped parsing tokens with %s", u_errorName(*status
));
1213 member
= parseResource(state
, subtag
, &comment
, status
);
1215 if (member
== NULL
|| U_FAILURE(*status
))
1217 error(line
, "parse error. Stopped parsing resource with %s", u_errorName(*status
));
1221 table_add(table
, member
, line
, status
);
1223 if (U_FAILURE(*status
))
1225 error(line
, "parse error. Stopped parsing table with %s", u_errorName(*status
));
1229 ustr_deinit(&comment
);
1233 /* A compiler warning will appear if all paths don't contain a return statement. */
1234 /* *status = U_INTERNAL_PROGRAM_ERROR;
1238 static struct SResource
*
1239 parseTable(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1241 struct SResource
*result
;
1243 if (tag
!= NULL
&& uprv_strcmp(tag
, "CollationElements") == 0)
1245 return parseCollationElements(state
, tag
, startline
, FALSE
, status
);
1247 if (tag
!= NULL
&& uprv_strcmp(tag
, "collations") == 0)
1249 return parseCollationElements(state
, tag
, startline
, TRUE
, status
);
1252 printf(" table %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1255 result
= table_open(state
->bundle
, tag
, comment
, status
);
1257 if (result
== NULL
|| U_FAILURE(*status
))
1261 return realParseTable(state
, result
, tag
, startline
, status
);
1264 static struct SResource
*
1265 parseArray(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1267 struct SResource
*result
= NULL
;
1268 struct SResource
*member
= NULL
;
1269 struct UString
*tokenValue
;
1270 struct UString memberComments
;
1271 enum ETokenType token
;
1272 UBool readToken
= FALSE
;
1274 result
= array_open(state
->bundle
, tag
, comment
, status
);
1276 if (result
== NULL
|| U_FAILURE(*status
))
1281 printf(" array %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1284 ustr_init(&memberComments
);
1286 /* '{' . resource [','] '}' */
1290 ustr_setlen(&memberComments
, 0, status
);
1292 /* check for end of array, but don't consume next token unless it really is the end */
1293 token
= peekToken(state
, 0, &tokenValue
, NULL
, &memberComments
, status
);
1296 if (token
== TOK_CLOSE_BRACE
)
1298 getToken(state
, NULL
, NULL
, NULL
, status
);
1300 warning(startline
, "Encountered empty array");
1305 if (token
== TOK_EOF
)
1308 *status
= U_INVALID_FORMAT_ERROR
;
1309 error(startline
, "unterminated array");
1313 /* string arrays are a special case */
1314 if (token
== TOK_STRING
)
1316 getToken(state
, &tokenValue
, &memberComments
, NULL
, status
);
1317 member
= string_open(state
->bundle
, NULL
, tokenValue
->fChars
, tokenValue
->fLength
, &memberComments
, status
);
1321 member
= parseResource(state
, NULL
, &memberComments
, status
);
1324 if (member
== NULL
|| U_FAILURE(*status
))
1330 array_add(result
, member
, status
);
1332 if (U_FAILURE(*status
))
1338 /* eat optional comma if present */
1339 token
= peekToken(state
, 0, NULL
, NULL
, NULL
, status
);
1341 if (token
== TOK_COMMA
)
1343 getToken(state
, NULL
, NULL
, NULL
, status
);
1346 if (U_FAILURE(*status
))
1354 ustr_deinit(&memberComments
);
1358 static struct SResource
*
1359 parseIntVector(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1361 struct SResource
*result
= NULL
;
1362 enum ETokenType token
;
1365 UBool readToken
= FALSE
;
1368 struct UString memberComments
;
1370 result
= intvector_open(state
->bundle
, tag
, comment
, status
);
1372 if (result
== NULL
|| U_FAILURE(*status
))
1378 printf(" vector %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1380 ustr_init(&memberComments
);
1381 /* '{' . string [','] '}' */
1384 ustr_setlen(&memberComments
, 0, status
);
1386 /* check for end of array, but don't consume next token unless it really is the end */
1387 token
= peekToken(state
, 0, NULL
, NULL
,&memberComments
, status
);
1389 if (token
== TOK_CLOSE_BRACE
)
1391 /* it's the end, consume the close brace */
1392 getToken(state
, NULL
, NULL
, NULL
, status
);
1394 warning(startline
, "Encountered empty int vector");
1396 ustr_deinit(&memberComments
);
1400 string
= getInvariantString(state
, NULL
, NULL
, status
);
1402 if (U_FAILURE(*status
))
1408 /* For handling illegal char in the Intvector */
1409 value
= uprv_strtoul(string
, &stopstring
, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1410 len
=(uint32_t)(stopstring
-string
);
1412 if(len
==uprv_strlen(string
))
1414 intvector_add(result
, value
, status
);
1416 token
= peekToken(state
, 0, NULL
, NULL
, NULL
, status
);
1421 *status
=U_INVALID_CHAR_FOUND
;
1424 if (U_FAILURE(*status
))
1430 /* the comma is optional (even though it is required to prevent the reader from concatenating
1431 consecutive entries) so that a missing comma on the last entry isn't an error */
1432 if (token
== TOK_COMMA
)
1434 getToken(state
, NULL
, NULL
, NULL
, status
);
1440 /* A compiler warning will appear if all paths don't contain a return statement. */
1441 /* intvector_close(result, status);
1442 *status = U_INTERNAL_PROGRAM_ERROR;
1446 static struct SResource
*
1447 parseBinary(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1449 struct SResource
*result
= NULL
;
1452 char toConv
[3] = {'\0', '\0', '\0'};
1459 string
= getInvariantString(state
, &line
, NULL
, status
);
1461 if (string
== NULL
|| U_FAILURE(*status
))
1466 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1468 if (U_FAILURE(*status
))
1475 printf(" binary %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1478 count
= (uint32_t)uprv_strlen(string
);
1481 value
= reinterpret_cast<uint8_t *>(uprv_malloc(sizeof(uint8_t) * count
));
1486 *status
= U_MEMORY_ALLOCATION_ERROR
;
1490 for (i
= 0; i
< count
; i
+= 2)
1492 toConv
[0] = string
[i
];
1493 toConv
[1] = string
[i
+ 1];
1495 value
[i
>> 1] = (uint8_t) uprv_strtoul(toConv
, &stopstring
, 16);
1496 len
=(uint32_t)(stopstring
-toConv
);
1498 if(len
!=uprv_strlen(toConv
))
1501 *status
=U_INVALID_CHAR_FOUND
;
1506 result
= bin_open(state
->bundle
, tag
, (i
>> 1), value
,NULL
, comment
, status
);
1512 *status
= U_INVALID_CHAR_FOUND
;
1514 error(line
, "Encountered invalid binary string");
1520 result
= bin_open(state
->bundle
, tag
, 0, NULL
, "",comment
,status
);
1521 warning(startline
, "Encountered empty binary tag");
1528 static struct SResource
*
1529 parseInteger(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1531 struct SResource
*result
= NULL
;
1537 string
= getInvariantString(state
, NULL
, NULL
, status
);
1539 if (string
== NULL
|| U_FAILURE(*status
))
1544 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1546 if (U_FAILURE(*status
))
1553 printf(" integer %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1556 if (uprv_strlen(string
) <= 0)
1558 warning(startline
, "Encountered empty integer. Default value is 0.");
1561 /* Allow integer support for hexdecimal, octal digit and decimal*/
1562 /* and handle illegal char in the integer*/
1563 value
= uprv_strtoul(string
, &stopstring
, 0);
1564 len
=(uint32_t)(stopstring
-string
);
1565 if(len
==uprv_strlen(string
))
1567 result
= int_open(state
->bundle
, tag
, value
, comment
, status
);
1571 *status
=U_INVALID_CHAR_FOUND
;
1578 static struct SResource
*
1579 parseImport(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
1581 struct SResource
*result
;
1587 char *fullname
= NULL
;
1588 filename
= getInvariantString(state
, &line
, NULL
, status
);
1590 if (U_FAILURE(*status
))
1595 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1597 if (U_FAILURE(*status
))
1599 uprv_free(filename
);
1604 printf(" import %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1607 /* Open the input file for reading */
1608 if (state
->inputdir
== NULL
)
1612 * Always save file file name, even if there's
1613 * no input directory specified. MIGHT BREAK SOMETHING
1615 int32_t filenameLength
= uprv_strlen(filename
);
1617 fullname
= (char *) uprv_malloc(filenameLength
+ 1);
1618 uprv_strcpy(fullname
, filename
);
1621 file
= T_FileStream_open(filename
, "rb");
1626 int32_t count
= (int32_t)uprv_strlen(filename
);
1628 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
1630 fullname
= (char *) uprv_malloc(state
->inputdirLength
+ count
+ 2);
1633 if(fullname
== NULL
)
1635 *status
= U_MEMORY_ALLOCATION_ERROR
;
1639 uprv_strcpy(fullname
, state
->inputdir
);
1641 fullname
[state
->inputdirLength
] = U_FILE_SEP_CHAR
;
1642 fullname
[state
->inputdirLength
+ 1] = '\0';
1644 uprv_strcat(fullname
, filename
);
1648 fullname
= (char *) uprv_malloc(state
->inputdirLength
+ count
+ 1);
1651 if(fullname
== NULL
)
1653 *status
= U_MEMORY_ALLOCATION_ERROR
;
1657 uprv_strcpy(fullname
, state
->inputdir
);
1658 uprv_strcat(fullname
, filename
);
1661 file
= T_FileStream_open(fullname
, "rb");
1667 error(line
, "couldn't open input file %s", filename
);
1668 *status
= U_FILE_ACCESS_ERROR
;
1672 len
= T_FileStream_size(file
);
1673 data
= (uint8_t*)uprv_malloc(len
* sizeof(uint8_t));
1677 *status
= U_MEMORY_ALLOCATION_ERROR
;
1678 T_FileStream_close (file
);
1682 /* int32_t numRead = */ T_FileStream_read (file
, data
, len
);
1683 T_FileStream_close (file
);
1685 result
= bin_open(state
->bundle
, tag
, len
, data
, fullname
, comment
, status
);
1688 uprv_free(filename
);
1689 uprv_free(fullname
);
1694 static struct SResource
*
1695 parseInclude(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
1697 struct SResource
*result
;
1701 UChar
*pTarget
= NULL
;
1704 char *fullname
= NULL
;
1706 const char* cp
= NULL
;
1707 const UChar
* uBuffer
= NULL
;
1709 filename
= getInvariantString(state
, &line
, NULL
, status
);
1710 count
= (int32_t)uprv_strlen(filename
);
1712 if (U_FAILURE(*status
))
1717 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1719 if (U_FAILURE(*status
))
1721 uprv_free(filename
);
1726 printf(" include %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1729 fullname
= (char *) uprv_malloc(state
->inputdirLength
+ count
+ 2);
1731 if(fullname
== NULL
)
1733 *status
= U_MEMORY_ALLOCATION_ERROR
;
1734 uprv_free(filename
);
1738 if(state
->inputdir
!=NULL
){
1739 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
1742 uprv_strcpy(fullname
, state
->inputdir
);
1744 fullname
[state
->inputdirLength
] = U_FILE_SEP_CHAR
;
1745 fullname
[state
->inputdirLength
+ 1] = '\0';
1747 uprv_strcat(fullname
, filename
);
1751 uprv_strcpy(fullname
, state
->inputdir
);
1752 uprv_strcat(fullname
, filename
);
1755 uprv_strcpy(fullname
,filename
);
1758 ucbuf
= ucbuf_open(fullname
, &cp
,getShowWarning(),FALSE
,status
);
1760 if (U_FAILURE(*status
)) {
1761 error(line
, "couldn't open input file %s\n", filename
);
1765 uBuffer
= ucbuf_getBuffer(ucbuf
,&len
,status
);
1766 result
= string_open(state
->bundle
, tag
, uBuffer
, len
, comment
, status
);
1770 uprv_free(filename
);
1771 uprv_free(fullname
);
1780 U_STRING_DECL(k_type_string
, "string", 6);
1781 U_STRING_DECL(k_type_binary
, "binary", 6);
1782 U_STRING_DECL(k_type_bin
, "bin", 3);
1783 U_STRING_DECL(k_type_table
, "table", 5);
1784 U_STRING_DECL(k_type_table_no_fallback
, "table(nofallback)", 17);
1785 U_STRING_DECL(k_type_int
, "int", 3);
1786 U_STRING_DECL(k_type_integer
, "integer", 7);
1787 U_STRING_DECL(k_type_array
, "array", 5);
1788 U_STRING_DECL(k_type_alias
, "alias", 5);
1789 U_STRING_DECL(k_type_intvector
, "intvector", 9);
1790 U_STRING_DECL(k_type_import
, "import", 6);
1791 U_STRING_DECL(k_type_include
, "include", 7);
1792 U_STRING_DECL(k_type_reserved
, "reserved", 8);
1794 /* Various non-standard processing plugins that create one or more special resources. */
1795 U_STRING_DECL(k_type_plugin_uca_rules
, "process(uca_rules)", 18);
1796 U_STRING_DECL(k_type_plugin_collation
, "process(collation)", 18);
1797 U_STRING_DECL(k_type_plugin_transliterator
, "process(transliterator)", 23);
1798 U_STRING_DECL(k_type_plugin_dependency
, "process(dependency)", 19);
1800 typedef enum EResourceType
1806 RT_TABLE_NO_FALLBACK
,
1813 RT_PROCESS_UCA_RULES
,
1814 RT_PROCESS_COLLATION
,
1815 RT_PROCESS_TRANSLITERATOR
,
1816 RT_PROCESS_DEPENDENCY
,
1821 const char *nameChars
; /* only used for debugging */
1822 const UChar
*nameUChars
;
1823 ParseResourceFunction
*parseFunction
;
1824 } gResourceTypes
[] = {
1825 {"Unknown", NULL
, NULL
},
1826 {"string", k_type_string
, parseString
},
1827 {"binary", k_type_binary
, parseBinary
},
1828 {"table", k_type_table
, parseTable
},
1829 {"table(nofallback)", k_type_table_no_fallback
, NULL
}, /* parseFunction will never be called */
1830 {"integer", k_type_integer
, parseInteger
},
1831 {"array", k_type_array
, parseArray
},
1832 {"alias", k_type_alias
, parseAlias
},
1833 {"intvector", k_type_intvector
, parseIntVector
},
1834 {"import", k_type_import
, parseImport
},
1835 {"include", k_type_include
, parseInclude
},
1836 {"process(uca_rules)", k_type_plugin_uca_rules
, parseUCARules
},
1837 {"process(collation)", k_type_plugin_collation
, NULL
/* not implemented yet */},
1838 {"process(transliterator)", k_type_plugin_transliterator
, parseTransliterator
},
1839 {"process(dependency)", k_type_plugin_dependency
, parseDependency
},
1840 {"reserved", NULL
, NULL
}
1843 void initParser(UBool omitCollationRules
)
1845 U_STRING_INIT(k_type_string
, "string", 6);
1846 U_STRING_INIT(k_type_binary
, "binary", 6);
1847 U_STRING_INIT(k_type_bin
, "bin", 3);
1848 U_STRING_INIT(k_type_table
, "table", 5);
1849 U_STRING_INIT(k_type_table_no_fallback
, "table(nofallback)", 17);
1850 U_STRING_INIT(k_type_int
, "int", 3);
1851 U_STRING_INIT(k_type_integer
, "integer", 7);
1852 U_STRING_INIT(k_type_array
, "array", 5);
1853 U_STRING_INIT(k_type_alias
, "alias", 5);
1854 U_STRING_INIT(k_type_intvector
, "intvector", 9);
1855 U_STRING_INIT(k_type_import
, "import", 6);
1856 U_STRING_INIT(k_type_reserved
, "reserved", 8);
1857 U_STRING_INIT(k_type_include
, "include", 7);
1859 U_STRING_INIT(k_type_plugin_uca_rules
, "process(uca_rules)", 18);
1860 U_STRING_INIT(k_type_plugin_collation
, "process(collation)", 18);
1861 U_STRING_INIT(k_type_plugin_transliterator
, "process(transliterator)", 23);
1862 U_STRING_INIT(k_type_plugin_dependency
, "process(dependency)", 19);
1864 gOmitCollationRules
= omitCollationRules
;
1867 static inline UBool
isTable(enum EResourceType type
) {
1868 return (UBool
)(type
==RT_TABLE
|| type
==RT_TABLE_NO_FALLBACK
);
1871 static enum EResourceType
1872 parseResourceType(ParseState
* state
, UErrorCode
*status
)
1874 struct UString
*tokenValue
;
1875 struct UString comment
;
1876 enum EResourceType result
= RT_UNKNOWN
;
1878 ustr_init(&comment
);
1879 expect(state
, TOK_STRING
, &tokenValue
, &comment
, &line
, status
);
1881 if (U_FAILURE(*status
))
1886 *status
= U_ZERO_ERROR
;
1888 /* Search for normal types */
1890 while ((result
=(EResourceType
)(result
+1)) < RT_RESERVED
) {
1891 if (u_strcmp(tokenValue
->fChars
, gResourceTypes
[result
].nameUChars
) == 0) {
1895 /* Now search for the aliases */
1896 if (u_strcmp(tokenValue
->fChars
, k_type_int
) == 0) {
1897 result
= RT_INTEGER
;
1899 else if (u_strcmp(tokenValue
->fChars
, k_type_bin
) == 0) {
1902 else if (result
== RT_RESERVED
) {
1903 char tokenBuffer
[1024];
1904 u_austrncpy(tokenBuffer
, tokenValue
->fChars
, sizeof(tokenBuffer
));
1905 tokenBuffer
[sizeof(tokenBuffer
) - 1] = 0;
1906 *status
= U_INVALID_FORMAT_ERROR
;
1907 error(line
, "unknown resource type '%s'", tokenBuffer
);
1913 /* parse a non-top-level resource */
1914 static struct SResource
*
1915 parseResource(ParseState
* state
, char *tag
, const struct UString
*comment
, UErrorCode
*status
)
1917 enum ETokenType token
;
1918 enum EResourceType resType
= RT_UNKNOWN
;
1919 ParseResourceFunction
*parseFunction
= NULL
;
1920 struct UString
*tokenValue
;
1925 token
= getToken(state
, &tokenValue
, NULL
, &startline
, status
);
1928 printf(" resource %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1931 /* name . [ ':' type ] '{' resource '}' */
1932 /* This function parses from the colon onwards. If the colon is present, parse the
1933 type then try to parse a resource of that type. If there is no explicit type,
1934 work it out using the lookahead tokens. */
1938 *status
= U_INVALID_FORMAT_ERROR
;
1939 error(startline
, "Unexpected EOF encountered");
1943 *status
= U_INVALID_FORMAT_ERROR
;
1947 resType
= parseResourceType(state
, status
);
1948 expect(state
, TOK_OPEN_BRACE
, &tokenValue
, NULL
, &startline
, status
);
1950 if (U_FAILURE(*status
))
1957 case TOK_OPEN_BRACE
:
1961 *status
= U_INVALID_FORMAT_ERROR
;
1962 error(startline
, "syntax error while reading a resource, expected '{' or ':'");
1967 if (resType
== RT_UNKNOWN
)
1969 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
1970 We could have any of the following:
1971 { { => array (nested)
1973 { string , => string array
1977 { string :/{ => table
1978 { string } => string
1981 token
= peekToken(state
, 0, NULL
, &line
, NULL
,status
);
1983 if (U_FAILURE(*status
))
1988 if (token
== TOK_OPEN_BRACE
|| token
== TOK_COLON
||token
==TOK_CLOSE_BRACE
)
1992 else if (token
== TOK_STRING
)
1994 token
= peekToken(state
, 1, NULL
, &line
, NULL
, status
);
1996 if (U_FAILURE(*status
))
2003 case TOK_COMMA
: resType
= RT_ARRAY
; break;
2004 case TOK_OPEN_BRACE
: resType
= RT_TABLE
; break;
2005 case TOK_CLOSE_BRACE
: resType
= RT_STRING
; break;
2006 case TOK_COLON
: resType
= RT_TABLE
; break;
2008 *status
= U_INVALID_FORMAT_ERROR
;
2009 error(line
, "Unexpected token after string, expected ',', '{' or '}'");
2015 *status
= U_INVALID_FORMAT_ERROR
;
2016 error(line
, "Unexpected token after '{'");
2020 /* printf("Type guessed as %s\n", resourceNames[resType]); */
2021 } else if(resType
== RT_TABLE_NO_FALLBACK
) {
2022 *status
= U_INVALID_FORMAT_ERROR
;
2023 error(startline
, "error: %s resource type not valid except on top bundle level", gResourceTypes
[resType
].nameChars
);
2028 /* We should now know what we need to parse next, so call the appropriate parser
2029 function and return. */
2030 parseFunction
= gResourceTypes
[resType
].parseFunction
;
2031 if (parseFunction
!= NULL
) {
2032 return parseFunction(state
, tag
, startline
, comment
, status
);
2035 *status
= U_INTERNAL_PROGRAM_ERROR
;
2036 error(startline
, "internal error: %s resource type found and not handled", gResourceTypes
[resType
].nameChars
);
2042 /* parse the top-level resource */
2044 parse(UCHARBUF
*buf
, const char *inputDir
, const char *outputDir
, UBool makeBinaryCollation
,
2047 struct UString
*tokenValue
;
2048 struct UString comment
;
2050 enum EResourceType bundleType
;
2051 enum ETokenType token
;
2056 for (i
= 0; i
< MAX_LOOKAHEAD
+ 1; i
++)
2058 ustr_init(&state
.lookahead
[i
].value
);
2059 ustr_init(&state
.lookahead
[i
].comment
);
2062 initLookahead(&state
, buf
, status
);
2064 state
.inputdir
= inputDir
;
2065 state
.inputdirLength
= (state
.inputdir
!= NULL
) ? (uint32_t)uprv_strlen(state
.inputdir
) : 0;
2066 state
.outputdir
= outputDir
;
2067 state
.outputdirLength
= (state
.outputdir
!= NULL
) ? (uint32_t)uprv_strlen(state
.outputdir
) : 0;
2068 state
.makeBinaryCollation
= makeBinaryCollation
;
2070 ustr_init(&comment
);
2071 expect(&state
, TOK_STRING
, &tokenValue
, &comment
, NULL
, status
);
2073 state
.bundle
= bundle_open(&comment
, FALSE
, status
);
2075 if (state
.bundle
== NULL
|| U_FAILURE(*status
))
2081 bundle_setlocale(state
.bundle
, tokenValue
->fChars
, status
);
2083 /* The following code is to make Empty bundle work no matter with :table specifer or not */
2084 token
= getToken(&state
, NULL
, NULL
, &line
, status
);
2085 if(token
==TOK_COLON
) {
2086 *status
=U_ZERO_ERROR
;
2087 bundleType
=parseResourceType(&state
, status
);
2089 if(isTable(bundleType
))
2091 expect(&state
, TOK_OPEN_BRACE
, NULL
, NULL
, &line
, status
);
2095 *status
=U_PARSE_ERROR
;
2096 error(line
, "parse error. Stopped parsing with %s", u_errorName(*status
));
2102 if(token
==TOK_OPEN_BRACE
)
2104 *status
=U_ZERO_ERROR
;
2105 bundleType
=RT_TABLE
;
2109 /* neither colon nor open brace */
2110 *status
=U_PARSE_ERROR
;
2111 bundleType
=RT_UNKNOWN
;
2112 error(line
, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status
));
2116 if (U_FAILURE(*status
))
2118 bundle_close(state
.bundle
, status
);
2122 if(bundleType
==RT_TABLE_NO_FALLBACK
) {
2124 * Parse a top-level table with the table(nofallback) declaration.
2125 * This is the same as a regular table, but also sets the
2126 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2128 state
.bundle
->noFallback
=TRUE
;
2130 /* top-level tables need not handle special table names like "collations" */
2131 realParseTable(&state
, state
.bundle
->fRoot
, NULL
, line
, status
);
2132 if(dependencyArray
!=NULL
){
2133 table_add(state
.bundle
->fRoot
, dependencyArray
, 0, status
);
2134 dependencyArray
= NULL
;
2136 if (U_FAILURE(*status
))
2138 bundle_close(state
.bundle
, status
);
2139 res_close(dependencyArray
);
2143 if (getToken(&state
, NULL
, NULL
, &line
, status
) != TOK_EOF
)
2145 warning(line
, "extraneous text after resource bundle (perhaps unmatched braces)");
2147 *status
= U_INVALID_FORMAT_ERROR
;
2152 cleanupLookahead(&state
);
2153 ustr_deinit(&comment
);
2154 return state
.bundle
;