2 *******************************************************************************
4 * Copyright (C) 1998-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 05/26/99 stephen Creation.
15 * 02/25/00 weiv Overhaul to write udata
16 * 5/10/01 Ram removed ustdio dependency
17 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
18 *******************************************************************************
31 #include "unicode/ustring.h"
32 #include "unicode/putil.h"
34 /* Number of tokens to read ahead of the current stream position */
35 #define MAX_LOOKAHEAD 3
37 #define U_ICU_UNIDATA "unidata"
45 #define STARTCOMMAND 0x005B
46 #define ENDCOMMAND 0x005D
48 U_STRING_DECL(k_type_string
, "string", 6);
49 U_STRING_DECL(k_type_binary
, "binary", 6);
50 U_STRING_DECL(k_type_bin
, "bin", 3);
51 U_STRING_DECL(k_type_table
, "table", 5);
52 U_STRING_DECL(k_type_int
, "int", 3);
53 U_STRING_DECL(k_type_integer
, "integer", 7);
54 U_STRING_DECL(k_type_array
, "array", 5);
55 U_STRING_DECL(k_type_alias
, "alias", 5);
56 U_STRING_DECL(k_type_intvector
, "intvector", 9);
57 U_STRING_DECL(k_type_import
, "import", 6);
58 U_STRING_DECL(k_type_include
, "include", 7);
59 U_STRING_DECL(k_type_reserved
, "reserved", 8);
76 /* only used for debugging */
77 const char *resourceNames
[] =
96 struct UString comment
;
100 /* keep in sync with token defines in read.h */
101 const char *tokenNames
[TOK_TOKEN_COUNT
] =
103 "string", /* A string token, such as "MonthNames" */
104 "'{'", /* An opening brace character */
105 "'}'", /* A closing brace character */
109 "<end of file>", /* End of the file has been reached successfully */
113 /* Just to store "TRUE" */
114 static const UChar trueValue
[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
116 static struct Lookahead lookahead
[MAX_LOOKAHEAD
+ 1];
117 static uint32_t lookaheadPosition
;
118 static UCHARBUF
*buffer
;
120 static struct SRBRoot
*bundle
;
121 static const char *inputdir
;
122 static uint32_t inputdirLength
;
124 static UBool gMakeBinaryCollation
= TRUE
;
126 static struct SResource
*parseResource(char *tag
, const struct UString
*comment
, UErrorCode
*status
);
128 void initParser(UBool makeBinaryCollation
)
132 U_STRING_INIT(k_type_string
, "string", 6);
133 U_STRING_INIT(k_type_binary
, "binary", 6);
134 U_STRING_INIT(k_type_bin
, "bin", 3);
135 U_STRING_INIT(k_type_table
, "table", 5);
136 U_STRING_INIT(k_type_int
, "int", 3);
137 U_STRING_INIT(k_type_integer
, "integer", 7);
138 U_STRING_INIT(k_type_array
, "array", 5);
139 U_STRING_INIT(k_type_alias
, "alias", 5);
140 U_STRING_INIT(k_type_intvector
, "intvector", 9);
141 U_STRING_INIT(k_type_import
, "import", 6);
142 U_STRING_INIT(k_type_reserved
, "reserved", 8);
143 U_STRING_INIT(k_type_include
, "include", 7);
144 for (i
= 0; i
< MAX_LOOKAHEAD
+ 1; i
++)
146 ustr_init(&lookahead
[i
].value
);
148 gMakeBinaryCollation
= makeBinaryCollation
;
151 /* The nature of the lookahead buffer:
152 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
153 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
154 When getToken is called, the current pointer is moved to the next slot and the
155 old slot is filled with the next token from the reader by calling getNextToken.
156 The token values are stored in the slot, which means that token values don't
157 survive a call to getToken, ie.
161 getToken(&value, NULL, status);
162 getToken(NULL, NULL, status); bad - value is now a different string
165 initLookahead(UCHARBUF
*buf
, UErrorCode
*status
)
167 static uint32_t initTypeStrings
= 0;
170 if (!initTypeStrings
)
175 lookaheadPosition
= 0;
180 for (i
= 0; i
< MAX_LOOKAHEAD
; i
++)
182 lookahead
[i
].type
= getNextToken(buffer
, &lookahead
[i
].value
, &lookahead
[i
].line
, &lookahead
[i
].comment
, status
);
183 if (U_FAILURE(*status
))
189 *status
= U_ZERO_ERROR
;
192 static enum ETokenType
193 getToken(struct UString
**tokenValue
, struct UString
* comment
, uint32_t *linenumber
, UErrorCode
*status
)
195 enum ETokenType result
;
198 result
= lookahead
[lookaheadPosition
].type
;
200 if (tokenValue
!= NULL
)
202 *tokenValue
= &lookahead
[lookaheadPosition
].value
;
205 if (linenumber
!= NULL
)
207 *linenumber
= lookahead
[lookaheadPosition
].line
;
212 ustr_cpy(comment
, &(lookahead
[lookaheadPosition
].comment
), status
);
215 i
= (lookaheadPosition
+ MAX_LOOKAHEAD
) % (MAX_LOOKAHEAD
+ 1);
216 lookaheadPosition
= (lookaheadPosition
+ 1) % (MAX_LOOKAHEAD
+ 1);
217 ustr_setlen(&lookahead
[i
].comment
, 0, status
);
218 ustr_setlen(&lookahead
[i
].value
, 0, status
);
219 lookahead
[i
].type
= getNextToken(buffer
, &lookahead
[i
].value
, &lookahead
[i
].line
, &lookahead
[i
].comment
, status
);
221 /* printf("getToken, returning %s\n", tokenNames[result]); */
226 static enum ETokenType
227 peekToken(uint32_t lookaheadCount
, struct UString
**tokenValue
, uint32_t *linenumber
, struct UString
*comment
, UErrorCode
*status
)
229 uint32_t i
= (lookaheadPosition
+ lookaheadCount
) % (MAX_LOOKAHEAD
+ 1);
231 if (U_FAILURE(*status
))
236 if (lookaheadCount
>= MAX_LOOKAHEAD
)
238 *status
= U_INTERNAL_PROGRAM_ERROR
;
242 if (tokenValue
!= NULL
)
244 *tokenValue
= &lookahead
[i
].value
;
247 if (linenumber
!= NULL
)
249 *linenumber
= lookahead
[i
].line
;
253 ustr_cpy(comment
, &(lookahead
[lookaheadPosition
].comment
), status
);
256 return lookahead
[i
].type
;
260 expect(enum ETokenType expectedToken
, struct UString
**tokenValue
, struct UString
*comment
, uint32_t *linenumber
, UErrorCode
*status
)
264 enum ETokenType token
= getToken(tokenValue
, comment
, &line
, status
);
266 if (U_FAILURE(*status
))
271 if (linenumber
!= NULL
)
276 if (token
!= expectedToken
)
278 *status
= U_INVALID_FORMAT_ERROR
;
279 error(line
, "expecting %s, got %s", tokenNames
[expectedToken
], tokenNames
[token
]);
281 else /* "else" is added by Jing/GCL */
283 *status
= U_ZERO_ERROR
;
287 static char *getInvariantString(uint32_t *line
, struct UString
*comment
, UErrorCode
*status
)
289 struct UString
*tokenValue
;
293 expect(TOK_STRING
, &tokenValue
, comment
, line
, status
);
295 if (U_FAILURE(*status
))
300 count
= u_strlen(tokenValue
->fChars
);
301 if(!uprv_isInvariantUString(tokenValue
->fChars
, count
)) {
302 *status
= U_INVALID_FORMAT_ERROR
;
303 error(*line
, "invariant characters required for table keys, binary data, etc.");
307 result
= uprv_malloc(count
+1);
311 *status
= U_MEMORY_ALLOCATION_ERROR
;
315 u_UCharsToChars(tokenValue
->fChars
, result
, count
+1);
319 static enum EResourceType
320 parseResourceType(UErrorCode
*status
)
322 struct UString
*tokenValue
;
323 struct UString comment
;
324 enum EResourceType result
= RT_UNKNOWN
;
327 expect(TOK_STRING
, &tokenValue
, &comment
, &line
, status
);
329 if (U_FAILURE(*status
))
334 *status
= U_ZERO_ERROR
;
336 if (u_strcmp(tokenValue
->fChars
, k_type_string
) == 0) {
338 } else if (u_strcmp(tokenValue
->fChars
, k_type_array
) == 0) {
340 } else if (u_strcmp(tokenValue
->fChars
, k_type_alias
) == 0) {
342 } else if (u_strcmp(tokenValue
->fChars
, k_type_table
) == 0) {
344 } else if (u_strcmp(tokenValue
->fChars
, k_type_binary
) == 0) {
346 } else if (u_strcmp(tokenValue
->fChars
, k_type_bin
) == 0) {
348 } else if (u_strcmp(tokenValue
->fChars
, k_type_int
) == 0) {
350 } else if (u_strcmp(tokenValue
->fChars
, k_type_integer
) == 0) {
352 } else if (u_strcmp(tokenValue
->fChars
, k_type_intvector
) == 0) {
353 result
= RT_INTVECTOR
;
354 } else if (u_strcmp(tokenValue
->fChars
, k_type_import
) == 0) {
356 } else if (u_strcmp(tokenValue
->fChars
, k_type_include
) == 0) {
358 } else if (u_strcmp(tokenValue
->fChars
, k_type_reserved
) == 0) {
359 result
= RT_RESERVED
;
361 char tokenBuffer
[1024];
362 u_austrncpy(tokenBuffer
, tokenValue
->fChars
, sizeof(tokenBuffer
));
363 tokenBuffer
[sizeof(tokenBuffer
) - 1] = 0;
364 *status
= U_INVALID_FORMAT_ERROR
;
365 error(line
, "unknown resource type '%s'", tokenBuffer
);
371 static struct SResource
*
372 parseUCARules(char *tag
, uint32_t startline
, UErrorCode
*status
)
374 struct SResource
*result
= NULL
;
375 struct UString
*tokenValue
;
376 struct UString comment
;
377 FileStream
*file
= NULL
;
378 char filename
[256] = { '\0' };
379 char cs
[128] = { '\0' };
382 UBool quoted
= FALSE
;
383 UCHARBUF
*ucbuf
=NULL
;
385 const char* cp
= NULL
;
386 UChar
*pTarget
= NULL
;
387 UChar
*target
= NULL
;
388 UChar
*targetLimit
= NULL
;
392 expect(TOK_STRING
, &tokenValue
, &comment
, &line
, status
);
395 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
398 if (U_FAILURE(*status
))
402 /* make the filename including the directory */
403 if (inputdir
!= NULL
)
405 uprv_strcat(filename
, inputdir
);
407 if (inputdir
[inputdirLength
- 1] != U_FILE_SEP_CHAR
)
409 uprv_strcat(filename
, U_FILE_SEP_STRING
);
413 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
415 expect(TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
417 if (U_FAILURE(*status
))
421 uprv_strcat(filename
,"..");
422 uprv_strcat(filename
,U_FILE_SEP_STRING
);
423 uprv_strcat(filename
, U_ICU_UNIDATA
);
424 uprv_strcat(filename
, U_FILE_SEP_STRING
);
425 uprv_strcat(filename
, cs
);
428 ucbuf
= ucbuf_open(filename
, &cp
, getShowWarning(),FALSE
, status
);
430 if (U_FAILURE(*status
)) {
431 error(line
, "An error occured while opening the input file %s\n", filename
);
435 /* We allocate more space than actually required
436 * since the actual size needed for storing UChars
437 * is not known in UTF-8 byte stream
439 size
= ucbuf_size(ucbuf
);
440 pTarget
= (UChar
*) uprv_malloc(U_SIZEOF_UCHAR
* size
);
441 uprv_memset(pTarget
, 0, size
*U_SIZEOF_UCHAR
);
443 targetLimit
= pTarget
+size
;
445 /* read the rules into the buffer */
446 while (target
< targetLimit
)
448 c
= ucbuf_getc(ucbuf
, status
);
450 quoted
= (UBool
)!quoted
;
452 /* weiv (06/26/2002): adding the following:
453 * - preserving spaces in commands [...]
454 * - # comments until the end of line
456 if (c
== STARTCOMMAND
&& !quoted
)
459 * closing bracket will be handled by the
460 * append at the end of the loop
462 while(c
!= ENDCOMMAND
) {
463 U_APPEND_CHAR32(c
, target
,len
);
464 c
= ucbuf_getc(ucbuf
, status
);
466 } else if (c
== HASH
&& !quoted
) {
468 while(c
!= CR
&& c
!= LF
) {
469 c
= ucbuf_getc(ucbuf
, status
);
472 } else if (c
== ESCAPE
)
474 c
= unescape(ucbuf
, status
);
479 T_FileStream_close(file
);
483 else if (!quoted
&& (c
== SPACE
|| c
== TAB
|| c
== CR
|| c
== LF
))
485 /* ignore spaces carriage returns
486 * and line feed unless in the form \uXXXX
491 /* Append UChar * after dissembling if c > 0xffff*/
494 U_APPEND_CHAR32(c
, target
,len
);
502 /* terminate the string */
503 if(target
< targetLimit
){
507 result
= string_open(bundle
, tag
, pTarget
, (int32_t)(target
- pTarget
), NULL
, status
);
512 T_FileStream_close(file
);
517 static struct SResource
*
518 parseString(char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
520 struct UString
*tokenValue
;
521 struct SResource
*result
= NULL
;
523 if (tag
!= NULL
&& uprv_strcmp(tag
, "%%UCARULES") == 0)
525 return parseUCARules(tag
, startline
, status
);
528 printf(" string %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
530 expect(TOK_STRING
, &tokenValue
, NULL
, NULL
, status
);
532 if (U_SUCCESS(*status
))
534 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
535 doesn't survive expect either) */
537 result
= string_open(bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
538 if(U_SUCCESS(*status
) && result
) {
539 expect(TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
541 if (U_FAILURE(*status
))
543 string_close(result
, status
);
552 static struct SResource
*
553 parseAlias(char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
555 struct UString
*tokenValue
;
556 struct SResource
*result
= NULL
;
558 expect(TOK_STRING
, &tokenValue
, NULL
, NULL
, status
);
561 printf(" alias %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
564 if (U_SUCCESS(*status
))
566 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
567 doesn't survive expect either) */
569 result
= alias_open(bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
571 expect(TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
573 if (U_FAILURE(*status
))
575 alias_close(result
, status
);
583 static struct SResource
*
584 addCollation(struct SResource
*result
, uint32_t startline
, UErrorCode
*status
)
586 struct SResource
*member
= NULL
;
587 struct UString
*tokenValue
;
588 struct UString comment
;
589 enum ETokenType token
;
591 UVersionInfo version
;
592 UBool override
= FALSE
;
594 /* '{' . (name resource)* '}' */
595 version
[0]=0; version
[1]=0; version
[2]=0; version
[3]=0;
600 token
= getToken(&tokenValue
, &comment
, &line
, status
);
602 if (token
== TOK_CLOSE_BRACE
)
607 if (token
!= TOK_STRING
)
609 table_close(result
, status
);
610 *status
= U_INVALID_FORMAT_ERROR
;
612 if (token
== TOK_EOF
)
614 error(startline
, "unterminated table");
618 error(line
, "Unexpected token %s", tokenNames
[token
]);
624 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
626 if (U_FAILURE(*status
))
628 table_close(result
, status
);
632 member
= parseResource(subtag
, NULL
, status
);
634 if (U_FAILURE(*status
))
636 table_close(result
, status
);
640 if (uprv_strcmp(subtag
, "Version") == 0)
643 int32_t length
= member
->u
.fString
.fLength
;
645 if (length
>= (int32_t) sizeof(ver
))
647 length
= (int32_t) sizeof(ver
) - 1;
650 u_UCharsToChars(member
->u
.fString
.fChars
, ver
, length
+ 1); /* +1 for copying NULL */
651 u_versionFromString(version
, ver
);
653 table_add(result
, member
, line
, status
);
656 else if (uprv_strcmp(subtag
, "Override") == 0)
660 if (u_strncmp(member
->u
.fString
.fChars
, trueValue
, u_strlen(trueValue
)) == 0)
664 table_add(result
, member
, line
, status
);
667 else if(uprv_strcmp(subtag
, "%%CollationBin")==0)
669 /* discard duplicate %%CollationBin if any*/
671 else if (uprv_strcmp(subtag
, "Sequence") == 0)
673 #if UCONFIG_NO_COLLATION
674 warning(line
, "Not building collation elements because of UCONFIG_NO_COLLATION, see uconfig.h");
676 /* first we add the "Sequence", so that we always have rules */
677 table_add(result
, member
, line
, status
);
678 if(gMakeBinaryCollation
) {
679 UErrorCode intStatus
= U_ZERO_ERROR
;
681 /* do the collation elements */
683 uint8_t *data
= NULL
;
684 UCollator
*coll
= NULL
;
685 UParseError parseError
;
687 /*table_add(result, member, line, status);*/
689 coll
= ucol_openRules(member
->u
.fString
.fChars
, member
->u
.fString
.fLength
,
690 UCOL_OFF
, UCOL_DEFAULT_STRENGTH
,&parseError
, &intStatus
);
692 if (U_SUCCESS(intStatus
) && coll
!= NULL
)
694 len
= ucol_cloneBinary(coll
, NULL
, 0, &intStatus
);
695 data
= (uint8_t *)uprv_malloc(len
);
696 len
= ucol_cloneBinary(coll
, data
, len
, &intStatus
);
697 /*data = ucol_cloneRuleData(coll, &len, &intStatus);*/
699 /* tailoring rules version */
701 /*coll->dataInfo.dataVersion[1] = version[0];*/
702 /* Copy tailoring version. Builder version already */
703 /* set in ucol_openRules */
704 ((UCATableHeader
*)data
)->version
[1] = version
[0];
705 ((UCATableHeader
*)data
)->version
[2] = version
[1];
706 ((UCATableHeader
*)data
)->version
[3] = version
[2];
708 if (U_SUCCESS(intStatus
) && data
!= NULL
)
710 member
= bin_open(bundle
, "%%CollationBin", len
, data
, NULL
, NULL
, status
);
711 /*table_add(bundle->fRoot, member, line, status);*/
712 table_add(result
, member
, line
, status
);
717 warning(line
, "could not obtain rules from collator");
719 *status
= U_INVALID_FORMAT_ERROR
;
728 warning(line
, "%%Collation could not be constructed from CollationElements - check context!");
736 printf("Not building Collation binary\n");
742 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
744 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
746 if (U_FAILURE(*status
))
748 table_close(result
, status
);
754 /* A compiler warning will appear if all paths don't contain a return statement. */
755 /* *status = U_INTERNAL_PROGRAM_ERROR;
759 static struct SResource
*
760 parseCollationElements(char *tag
, uint32_t startline
, UBool newCollation
, UErrorCode
*status
)
762 struct SResource
*result
= NULL
;
763 struct SResource
*member
= NULL
;
764 struct SResource
*collationRes
= NULL
;
765 struct UString
*tokenValue
;
766 struct UString comment
;
767 enum ETokenType token
;
768 char subtag
[1024], typeKeyword
[1024];
771 result
= table_open(bundle
, tag
, NULL
, status
);
773 if (result
== NULL
|| U_FAILURE(*status
))
778 printf(" collation elements %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
781 return addCollation(result
, startline
, status
);
785 token
= getToken(&tokenValue
, &comment
, &line
, status
);
787 if (token
== TOK_CLOSE_BRACE
)
792 if (token
!= TOK_STRING
)
794 table_close(result
, status
);
795 *status
= U_INVALID_FORMAT_ERROR
;
797 if (token
== TOK_EOF
)
799 error(startline
, "unterminated table");
803 error(line
, "Unexpected token %s", tokenNames
[token
]);
809 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
811 if (U_FAILURE(*status
))
813 table_close(result
, status
);
817 if (uprv_strcmp(subtag
, "default") == 0)
819 member
= parseResource(subtag
, NULL
, status
);
821 if (U_FAILURE(*status
))
823 table_close(result
, status
);
827 table_add(result
, member
, line
, status
);
831 token
= peekToken(0, &tokenValue
, &line
, &comment
, status
);
832 /* this probably needs to be refactored or recursively use the parser */
833 /* first we assume that our collation table won't have the explicit type */
834 /* then, we cannot handle aliases */
835 if(token
== TOK_OPEN_BRACE
) {
836 token
= getToken(&tokenValue
, &comment
, &line
, status
);
837 collationRes
= table_open(bundle
, subtag
, NULL
, status
);
838 table_add(result
, addCollation(collationRes
, startline
, status
), startline
, status
);
839 } else if(token
== TOK_COLON
) { /* right now, we'll just try to see if we have aliases */
840 /* we could have a table too */
841 token
= peekToken(1, &tokenValue
, &line
, &comment
, status
);
842 u_UCharsToChars(tokenValue
->fChars
, typeKeyword
, u_strlen(tokenValue
->fChars
) + 1);
843 if(uprv_strcmp(typeKeyword
, "alias") == 0) {
844 member
= parseResource(subtag
, NULL
, status
);
846 if (U_FAILURE(*status
))
848 table_close(result
, status
);
852 table_add(result
, member
, line
, status
);
854 *status
= U_INVALID_FORMAT_ERROR
;
858 *status
= U_INVALID_FORMAT_ERROR
;
863 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
865 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
867 if (U_FAILURE(*status
))
869 table_close(result
, status
);
877 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
878 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
879 static struct SResource
*
880 realParseTable(struct SResource
*table
, char *tag
, uint32_t startline
, UErrorCode
*status
)
882 struct SResource
*member
= NULL
;
883 struct UString
*tokenValue
=NULL
;
884 struct UString comment
;
885 enum ETokenType token
;
888 UBool readToken
= FALSE
;
890 /* '{' . (name resource)* '}' */
892 printf(" parsing table %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
897 token
= getToken(&tokenValue
, &comment
, &line
, status
);
899 if (token
== TOK_CLOSE_BRACE
)
902 warning(startline
, "Encountered empty table");
907 if (token
!= TOK_STRING
)
909 table_close(table
, status
);
910 *status
= U_INVALID_FORMAT_ERROR
;
912 if (token
== TOK_EOF
)
914 error(startline
, "unterminated table");
918 error(line
, "unexpected token %s", tokenNames
[token
]);
924 if(uprv_isInvariantUString(tokenValue
->fChars
, -1)) {
925 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
927 *status
= U_INVALID_FORMAT_ERROR
;
928 error(line
, "invariant characters required for table keys");
929 table_close(table
, status
);
933 if (U_FAILURE(*status
))
935 error(line
, "parse error. Stopped parsing with %s", u_errorName(*status
));
936 table_close(table
, status
);
940 member
= parseResource(subtag
, &comment
, status
);
942 if (member
== NULL
|| U_FAILURE(*status
))
944 error(line
, "parse error. Stopped parsing with %s", u_errorName(*status
));
945 table_close(table
, status
);
949 table_add(table
, member
, line
, status
);
951 if (U_FAILURE(*status
))
953 error(line
, "parse error. Stopped parsing with %s", u_errorName(*status
));
954 table_close(table
, status
);
961 /* A compiler warning will appear if all paths don't contain a return statement. */
962 /* *status = U_INTERNAL_PROGRAM_ERROR;
966 static struct SResource
*
967 parseTable(char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
969 struct SResource
*result
;
971 if (tag
!= NULL
&& uprv_strcmp(tag
, "CollationElements") == 0)
973 return parseCollationElements(tag
, startline
, FALSE
, status
);
975 if (tag
!= NULL
&& uprv_strcmp(tag
, "collations") == 0)
977 return parseCollationElements(tag
, startline
, TRUE
, status
);
980 printf(" table %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
983 result
= table_open(bundle
, tag
, comment
, status
);
985 if (result
== NULL
|| U_FAILURE(*status
))
990 return realParseTable(result
, tag
, startline
, status
);
993 static struct SResource
*
994 parseArray(char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
996 struct SResource
*result
= NULL
;
997 struct SResource
*member
= NULL
;
998 struct UString
*tokenValue
;
999 struct UString memberComments
;
1000 enum ETokenType token
;
1001 UBool readToken
= FALSE
;
1003 result
= array_open(bundle
, tag
, comment
, status
);
1005 if (result
== NULL
|| U_FAILURE(*status
))
1010 printf(" array %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1013 ustr_init(&memberComments
);
1015 /* '{' . resource [','] '}' */
1019 ustr_setlen(&memberComments
, 0, status
);
1021 /* check for end of array, but don't consume next token unless it really is the end */
1022 token
= peekToken(0, &tokenValue
, NULL
, &memberComments
, status
);
1025 if (token
== TOK_CLOSE_BRACE
)
1027 getToken(NULL
, NULL
, NULL
, status
);
1029 warning(startline
, "Encountered empty array");
1034 if (token
== TOK_EOF
)
1036 array_close(result
, status
);
1037 *status
= U_INVALID_FORMAT_ERROR
;
1038 error(startline
, "unterminated array");
1042 /* string arrays are a special case */
1043 if (token
== TOK_STRING
)
1045 getToken(&tokenValue
, &memberComments
, NULL
, status
);
1046 member
= string_open(bundle
, NULL
, tokenValue
->fChars
, tokenValue
->fLength
, &memberComments
, status
);
1050 member
= parseResource(NULL
, &memberComments
, status
);
1053 if (member
== NULL
|| U_FAILURE(*status
))
1055 array_close(result
, status
);
1059 array_add(result
, member
, status
);
1061 if (U_FAILURE(*status
))
1063 array_close(result
, status
);
1067 /* eat optional comma if present */
1068 token
= peekToken(0, NULL
, NULL
, NULL
, status
);
1070 if (token
== TOK_COMMA
)
1072 getToken(NULL
, NULL
, NULL
, status
);
1075 if (U_FAILURE(*status
))
1077 array_close(result
, status
);
1086 static struct SResource
*
1087 parseIntVector(char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1089 struct SResource
*result
= NULL
;
1090 enum ETokenType token
;
1093 UBool readToken
= FALSE
;
1094 /* added by Jing/GCL */
1097 struct UString memberComments
;
1099 result
= intvector_open(bundle
, tag
, comment
, status
);
1101 if (result
== NULL
|| U_FAILURE(*status
))
1107 printf(" vector %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1109 ustr_init(&memberComments
);
1110 /* '{' . string [','] '}' */
1113 ustr_setlen(&memberComments
, 0, status
);
1115 /* check for end of array, but don't consume next token unless it really is the end */
1116 token
= peekToken(0, NULL
, NULL
,&memberComments
, status
);
1118 if (token
== TOK_CLOSE_BRACE
)
1120 /* it's the end, consume the close brace */
1121 getToken(NULL
, NULL
, NULL
, status
);
1123 warning(startline
, "Encountered empty int vector");
1128 string
= getInvariantString(NULL
, NULL
, status
);
1130 if (U_FAILURE(*status
))
1132 intvector_close(result
, status
);
1135 /* Commented by Jing/GCL */
1136 /*value = uprv_strtol(string, NULL, 10);
1137 intvector_add(result, value, status);
1141 token = peekToken(0, NULL, NULL, status);*/
1143 /* The following is added by Jing/GCL to handle illegal char in the Intvector */
1144 value
= uprv_strtoul(string
, &stopstring
, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1145 len
=(uint32_t)(stopstring
-string
);
1147 if(len
==uprv_strlen(string
))
1149 intvector_add(result
, value
, status
);
1151 token
= peekToken(0, NULL
, NULL
, NULL
, status
);
1156 *status
=U_INVALID_CHAR_FOUND
;
1158 /* The above is added by Jing/GCL */
1160 if (U_FAILURE(*status
))
1162 intvector_close(result
, status
);
1166 /* the comma is optional (even though it is required to prevent the reader from concatenating
1167 consecutive entries) so that a missing comma on the last entry isn't an error */
1168 if (token
== TOK_COMMA
)
1170 getToken(NULL
, NULL
, NULL
, status
);
1176 /* A compiler warning will appear if all paths don't contain a return statement. */
1177 /* intvector_close(result, status);
1178 *status = U_INTERNAL_PROGRAM_ERROR;
1182 static struct SResource
*
1183 parseBinary(char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1185 struct SResource
*result
= NULL
;
1188 char toConv
[3] = {'\0', '\0', '\0'};
1192 /* added by Jing/GCL */
1196 string
= getInvariantString(&line
, NULL
, status
);
1198 if (string
== NULL
|| U_FAILURE(*status
))
1203 expect(TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1205 if (U_FAILURE(*status
))
1212 printf(" binary %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1215 count
= (uint32_t)uprv_strlen(string
);
1218 value
= uprv_malloc(sizeof(uint8_t) * count
);
1223 *status
= U_MEMORY_ALLOCATION_ERROR
;
1227 for (i
= 0; i
< count
; i
+= 2)
1229 toConv
[0] = string
[i
];
1230 toConv
[1] = string
[i
+ 1];
1232 value
[i
>> 1] = (uint8_t) uprv_strtoul(toConv
, &stopstring
, 16);
1233 len
=(uint32_t)(stopstring
-toConv
);
1235 if(len
!=uprv_strlen(toConv
))
1238 *status
=U_INVALID_CHAR_FOUND
;
1243 result
= bin_open(bundle
, tag
, (i
>> 1), value
,NULL
, comment
, status
);
1249 *status
= U_INVALID_CHAR_FOUND
;
1251 error(line
, "Encountered invalid binary string");
1257 result
= bin_open(bundle
, tag
, 0, NULL
, "",comment
,status
);
1258 warning(startline
, "Encountered empty binary tag");
1265 static struct SResource
*
1266 parseInteger(char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1268 struct SResource
*result
= NULL
;
1271 /* added by Jing/GCL */
1275 string
= getInvariantString(NULL
, NULL
, status
);
1277 if (string
== NULL
|| U_FAILURE(*status
))
1282 expect(TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1284 if (U_FAILURE(*status
))
1291 printf(" integer %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1294 if (uprv_strlen(string
) <= 0)
1296 warning(startline
, "Encountered empty integer. Default value is 0.");
1299 /* commented by Jing/GCL */
1300 /* value = uprv_strtol(string, NULL, 10);*/
1301 /* result = int_open(bundle, tag, value, status);*/
1302 /* The following is added by Jing/GCL*/
1303 /* to make integer support hexdecimal, octal digit and decimal*/
1304 /* to handle illegal char in the integer*/
1305 value
= uprv_strtoul(string
, &stopstring
, 0);
1306 len
=(uint32_t)(stopstring
-string
);
1307 if(len
==uprv_strlen(string
))
1309 result
= int_open(bundle
, tag
, value
, comment
, status
);
1313 *status
=U_INVALID_CHAR_FOUND
;
1320 static struct SResource
*
1321 parseImport(char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
1323 struct SResource
*result
;
1329 char *fullname
= NULL
;
1330 int32_t numRead
= 0;
1331 filename
= getInvariantString(&line
, NULL
, status
);
1333 if (U_FAILURE(*status
))
1338 expect(TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1340 if (U_FAILURE(*status
))
1342 uprv_free(filename
);
1347 printf(" import %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1350 /* Open the input file for reading */
1351 if (inputdir
== NULL
)
1353 file
= T_FileStream_open(filename
, "rb");
1358 int32_t count
= (int32_t)uprv_strlen(filename
);
1360 if (inputdir
[inputdirLength
- 1] != U_FILE_SEP_CHAR
)
1362 fullname
= (char *) uprv_malloc(inputdirLength
+ count
+ 2);
1365 if(fullname
== NULL
)
1367 *status
= U_MEMORY_ALLOCATION_ERROR
;
1371 uprv_strcpy(fullname
, inputdir
);
1373 fullname
[inputdirLength
] = U_FILE_SEP_CHAR
;
1374 fullname
[inputdirLength
+ 1] = '\0';
1376 uprv_strcat(fullname
, filename
);
1380 fullname
= (char *) uprv_malloc(inputdirLength
+ count
+ 1);
1383 if(fullname
== NULL
)
1385 *status
= U_MEMORY_ALLOCATION_ERROR
;
1389 uprv_strcpy(fullname
, inputdir
);
1390 uprv_strcat(fullname
, filename
);
1393 file
= T_FileStream_open(fullname
, "rb");
1399 error(line
, "couldn't open input file %s", filename
);
1400 *status
= U_FILE_ACCESS_ERROR
;
1404 len
= T_FileStream_size(file
);
1405 data
= (uint8_t*)uprv_malloc(len
* sizeof(uint8_t));
1409 *status
= U_MEMORY_ALLOCATION_ERROR
;
1410 T_FileStream_close (file
);
1414 numRead
= T_FileStream_read (file
, data
, len
);
1415 T_FileStream_close (file
);
1417 result
= bin_open(bundle
, tag
, len
, data
, fullname
, comment
, status
);
1420 uprv_free(filename
);
1421 uprv_free(fullname
);
1426 static struct SResource
*
1427 parseInclude(char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
1429 struct SResource
*result
;
1433 UChar
*pTarget
= NULL
;
1436 char *fullname
= NULL
;
1438 const char* cp
= NULL
;
1439 const UChar
* uBuffer
= NULL
;
1441 filename
= getInvariantString(&line
, NULL
, status
);
1442 count
= (int32_t)uprv_strlen(filename
);
1444 if (U_FAILURE(*status
))
1449 expect(TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1451 if (U_FAILURE(*status
))
1453 uprv_free(filename
);
1458 printf(" include %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1461 fullname
= (char *) uprv_malloc(inputdirLength
+ count
+ 2);
1463 if(fullname
== NULL
)
1465 *status
= U_MEMORY_ALLOCATION_ERROR
;
1466 uprv_free(filename
);
1471 if (inputdir
[inputdirLength
- 1] != U_FILE_SEP_CHAR
)
1474 uprv_strcpy(fullname
, inputdir
);
1476 fullname
[inputdirLength
] = U_FILE_SEP_CHAR
;
1477 fullname
[inputdirLength
+ 1] = '\0';
1479 uprv_strcat(fullname
, filename
);
1483 uprv_strcpy(fullname
, inputdir
);
1484 uprv_strcat(fullname
, filename
);
1487 uprv_strcpy(fullname
,filename
);
1490 ucbuf
= ucbuf_open(fullname
, &cp
,getShowWarning(),FALSE
,status
);
1492 if (U_FAILURE(*status
)) {
1493 error(line
, "couldn't open input file %s\n", filename
);
1497 uBuffer
= ucbuf_getBuffer(ucbuf
,&len
,status
);
1498 result
= string_open(bundle
, tag
, uBuffer
, len
, comment
, status
);
1502 uprv_free(filename
);
1503 uprv_free(fullname
);
1508 static struct SResource
*
1509 parseResource(char *tag
, const struct UString
*comment
, UErrorCode
*status
)
1511 enum ETokenType token
;
1512 enum EResourceType resType
= RT_UNKNOWN
;
1513 struct UString
*tokenValue
;
1517 token
= getToken(&tokenValue
, NULL
, &startline
, status
);
1520 printf(" resource %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1523 /* name . [ ':' type ] '{' resource '}' */
1524 /* This function parses from the colon onwards. If the colon is present, parse the
1525 type then try to parse a resource of that type. If there is no explicit type,
1526 work it out using the lookahead tokens. */
1530 *status
= U_INVALID_FORMAT_ERROR
;
1531 error(startline
, "Unexpected EOF encountered");
1535 *status
= U_INVALID_FORMAT_ERROR
;
1539 resType
= parseResourceType(status
);
1540 expect(TOK_OPEN_BRACE
, &tokenValue
, NULL
, &startline
, status
);
1542 if (U_FAILURE(*status
))
1549 case TOK_OPEN_BRACE
:
1553 *status
= U_INVALID_FORMAT_ERROR
;
1554 error(startline
, "syntax error while reading a resource, expected '{' or ':'");
1558 if (resType
== RT_UNKNOWN
)
1560 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
1561 We could have any of the following:
1562 { { => array (nested)
1564 { string , => string array
1566 commented by Jing/GCL
1571 { string :/{ => table
1572 { string } => string
1575 token
= peekToken(0, NULL
, &line
, NULL
,status
);
1577 if (U_FAILURE(*status
))
1582 /* Commented by Jing/GCL */
1583 /* if (token == TOK_OPEN_BRACE || token == TOK_COLON )*/
1584 if (token
== TOK_OPEN_BRACE
|| token
== TOK_COLON
||token
==TOK_CLOSE_BRACE
)
1588 else if (token
== TOK_STRING
)
1590 token
= peekToken(1, NULL
, &line
, NULL
, status
);
1592 if (U_FAILURE(*status
))
1599 case TOK_COMMA
: resType
= RT_ARRAY
; break;
1600 case TOK_OPEN_BRACE
: resType
= RT_TABLE
; break;
1601 case TOK_CLOSE_BRACE
: resType
= RT_STRING
; break;
1602 /* added by Jing/GCL to make table work when :table is omitted */
1603 case TOK_COLON
: resType
= RT_TABLE
; break;
1605 *status
= U_INVALID_FORMAT_ERROR
;
1606 error(line
, "Unexpected token after string, expected ',', '{' or '}'");
1612 *status
= U_INVALID_FORMAT_ERROR
;
1613 error(line
, "Unexpected token after '{'");
1617 /* printf("Type guessed as %s\n", resourceNames[resType]); */
1620 /* We should now know what we need to parse next, so call the appropriate parser
1621 function and return. */
1624 case RT_STRING
: return parseString (tag
, startline
, comment
, status
);
1625 case RT_TABLE
: return parseTable (tag
, startline
, comment
, status
);
1626 case RT_ARRAY
: return parseArray (tag
, startline
, comment
, status
);
1627 case RT_ALIAS
: return parseAlias (tag
, startline
, comment
, status
);
1628 case RT_BINARY
: return parseBinary (tag
, startline
, comment
, status
);
1629 case RT_INTEGER
: return parseInteger (tag
, startline
, comment
, status
);
1630 case RT_IMPORT
: return parseImport (tag
, startline
, comment
, status
);
1631 case RT_INCLUDE
: return parseInclude (tag
, startline
, comment
, status
);
1632 case RT_INTVECTOR
: return parseIntVector (tag
, startline
, comment
, status
);
1635 *status
= U_INTERNAL_PROGRAM_ERROR
;
1636 error(startline
, "internal error: unknown resource type found and not handled");
1643 parse(UCHARBUF
*buf
, const char *currentInputDir
, UErrorCode
*status
)
1645 struct UString
*tokenValue
;
1646 struct UString comment
;
1648 /* added by Jing/GCL */
1649 enum EResourceType bundleType
;
1650 enum ETokenType token
;
1652 initLookahead(buf
, status
);
1654 inputdir
= currentInputDir
;
1655 inputdirLength
= (inputdir
!= NULL
) ? (uint32_t)uprv_strlen(inputdir
) : 0;
1657 ustr_init(&comment
);
1658 expect(TOK_STRING
, &tokenValue
, &comment
, NULL
, status
);
1660 bundle
= bundle_open(&comment
, status
);
1662 if (bundle
== NULL
|| U_FAILURE(*status
))
1668 bundle_setlocale(bundle
, tokenValue
->fChars
, status
);
1669 /* Commented by Jing/GCL */
1670 /* expect(TOK_OPEN_BRACE, NULL, &line, status); */
1671 /* The following code is to make Empty bundle work no matter with :table specifer or not */
1672 token
= getToken(NULL
, NULL
, &line
, status
);
1674 if(token
==TOK_COLON
)
1676 *status
=U_ZERO_ERROR
;
1680 *status
=U_PARSE_ERROR
;
1683 if(U_SUCCESS(*status
)){
1685 bundleType
=parseResourceType(status
);
1687 if(bundleType
==RT_TABLE
)
1689 expect(TOK_OPEN_BRACE
, NULL
, NULL
, &line
, status
);
1693 *status
=U_PARSE_ERROR
;
1694 error(line
, "parse error. Stopped parsing with %s", u_errorName(*status
));
1699 if(token
==TOK_OPEN_BRACE
)
1701 *status
=U_ZERO_ERROR
;
1705 error(line
, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status
));
1708 /* The above is added by Jing/GCL */
1710 if (U_FAILURE(*status
))
1712 bundle_close(bundle
, status
);
1716 realParseTable(bundle
->fRoot
, NULL
, line
, status
);
1718 if (U_FAILURE(*status
))
1720 bundle_close(bundle
, status
);
1724 if (getToken(NULL
, NULL
, &line
, status
) != TOK_EOF
)
1726 warning(line
, "extraneous text after resource bundle (perhaps unmatched braces)");
1728 *status
= U_INVALID_FORMAT_ERROR
;