2 *******************************************************************************
4 * Copyright (C) 1998-2014, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 05/26/99 stephen Creation.
15 * 02/25/00 weiv Overhaul to write udata
16 * 5/10/01 Ram removed ustdio dependency
17 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
18 *******************************************************************************
21 // Safer use of UnicodeString.
22 #ifndef UNISTR_FROM_CHAR_EXPLICIT
23 # define UNISTR_FROM_CHAR_EXPLICIT explicit
26 // Less important, but still a good idea.
27 #ifndef UNISTR_FROM_STRING_EXPLICIT
28 # define UNISTR_FROM_STRING_EXPLICIT explicit
42 #include "unicode/ustring.h"
43 #include "unicode/uscript.h"
44 #include "unicode/utf16.h"
45 #include "unicode/putil.h"
46 #include "collationbuilder.h"
47 #include "collationdata.h"
48 #include "collationdatareader.h"
49 #include "collationdatawriter.h"
50 #include "collationfastlatinbuilder.h"
51 #include "collationinfo.h"
52 #include "collationroot.h"
53 #include "collationruleparser.h"
54 #include "collationtailoring.h"
57 /* Number of tokens to read ahead of the current stream position */
58 #define MAX_LOOKAHEAD 3
68 #define STARTCOMMAND 0x005B
69 #define ENDCOMMAND 0x005D
70 #define OPENSQBRACKET 0x005B
71 #define CLOSESQBRACKET 0x005D
73 using icu::LocalPointer
;
74 using icu::UnicodeString
;
80 struct UString comment
;
84 /* keep in sync with token defines in read.h */
85 const char *tokenNames
[TOK_TOKEN_COUNT
] =
87 "string", /* A string token, such as "MonthNames" */
88 "'{'", /* An opening brace character */
89 "'}'", /* A closing brace character */
93 "<end of file>", /* End of the file has been reached successfully */
97 /* Just to store "TRUE" */
98 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
101 struct Lookahead lookahead
[MAX_LOOKAHEAD
+ 1];
102 uint32_t lookaheadPosition
;
104 struct SRBRoot
*bundle
;
105 const char *inputdir
;
106 uint32_t inputdirLength
;
107 const char *outputdir
;
108 uint32_t outputdirLength
;
109 const char *filename
;
110 UBool makeBinaryCollation
;
111 UBool omitCollationRules
;
114 typedef struct SResource
*
115 ParseResourceFunction(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
);
117 static struct SResource
*parseResource(ParseState
* state
, char *tag
, const struct UString
*comment
, UErrorCode
*status
);
119 /* The nature of the lookahead buffer:
120 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
121 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
122 When getToken is called, the current pointer is moved to the next slot and the
123 old slot is filled with the next token from the reader by calling getNextToken.
124 The token values are stored in the slot, which means that token values don't
125 survive a call to getToken, ie.
129 getToken(&value, NULL, status);
130 getToken(NULL, NULL, status); bad - value is now a different string
133 initLookahead(ParseState
* state
, UCHARBUF
*buf
, UErrorCode
*status
)
135 static uint32_t initTypeStrings
= 0;
138 if (!initTypeStrings
)
143 state
->lookaheadPosition
= 0;
148 for (i
= 0; i
< MAX_LOOKAHEAD
; i
++)
150 state
->lookahead
[i
].type
= getNextToken(state
->buffer
, &state
->lookahead
[i
].value
, &state
->lookahead
[i
].line
, &state
->lookahead
[i
].comment
, status
);
151 if (U_FAILURE(*status
))
157 *status
= U_ZERO_ERROR
;
161 cleanupLookahead(ParseState
* state
)
164 for (i
= 0; i
<= MAX_LOOKAHEAD
; i
++)
166 ustr_deinit(&state
->lookahead
[i
].value
);
167 ustr_deinit(&state
->lookahead
[i
].comment
);
172 static enum ETokenType
173 getToken(ParseState
* state
, struct UString
**tokenValue
, struct UString
* comment
, uint32_t *linenumber
, UErrorCode
*status
)
175 enum ETokenType result
;
178 result
= state
->lookahead
[state
->lookaheadPosition
].type
;
180 if (tokenValue
!= NULL
)
182 *tokenValue
= &state
->lookahead
[state
->lookaheadPosition
].value
;
185 if (linenumber
!= NULL
)
187 *linenumber
= state
->lookahead
[state
->lookaheadPosition
].line
;
192 ustr_cpy(comment
, &(state
->lookahead
[state
->lookaheadPosition
].comment
), status
);
195 i
= (state
->lookaheadPosition
+ MAX_LOOKAHEAD
) % (MAX_LOOKAHEAD
+ 1);
196 state
->lookaheadPosition
= (state
->lookaheadPosition
+ 1) % (MAX_LOOKAHEAD
+ 1);
197 ustr_setlen(&state
->lookahead
[i
].comment
, 0, status
);
198 ustr_setlen(&state
->lookahead
[i
].value
, 0, status
);
199 state
->lookahead
[i
].type
= getNextToken(state
->buffer
, &state
->lookahead
[i
].value
, &state
->lookahead
[i
].line
, &state
->lookahead
[i
].comment
, status
);
201 /* printf("getToken, returning %s\n", tokenNames[result]); */
206 static enum ETokenType
207 peekToken(ParseState
* state
, uint32_t lookaheadCount
, struct UString
**tokenValue
, uint32_t *linenumber
, struct UString
*comment
, UErrorCode
*status
)
209 uint32_t i
= (state
->lookaheadPosition
+ lookaheadCount
) % (MAX_LOOKAHEAD
+ 1);
211 if (U_FAILURE(*status
))
216 if (lookaheadCount
>= MAX_LOOKAHEAD
)
218 *status
= U_INTERNAL_PROGRAM_ERROR
;
222 if (tokenValue
!= NULL
)
224 *tokenValue
= &state
->lookahead
[i
].value
;
227 if (linenumber
!= NULL
)
229 *linenumber
= state
->lookahead
[i
].line
;
233 ustr_cpy(comment
, &(state
->lookahead
[state
->lookaheadPosition
].comment
), status
);
236 return state
->lookahead
[i
].type
;
240 expect(ParseState
* state
, enum ETokenType expectedToken
, struct UString
**tokenValue
, struct UString
*comment
, uint32_t *linenumber
, UErrorCode
*status
)
244 enum ETokenType token
= getToken(state
, tokenValue
, comment
, &line
, status
);
246 if (linenumber
!= NULL
)
251 if (U_FAILURE(*status
))
256 if (token
!= expectedToken
)
258 *status
= U_INVALID_FORMAT_ERROR
;
259 error(line
, "expecting %s, got %s", tokenNames
[expectedToken
], tokenNames
[token
]);
263 *status
= U_ZERO_ERROR
;
267 static char *getInvariantString(ParseState
* state
, uint32_t *line
, struct UString
*comment
, UErrorCode
*status
)
269 struct UString
*tokenValue
;
273 expect(state
, TOK_STRING
, &tokenValue
, comment
, line
, status
);
275 if (U_FAILURE(*status
))
280 count
= u_strlen(tokenValue
->fChars
);
281 if(!uprv_isInvariantUString(tokenValue
->fChars
, count
)) {
282 *status
= U_INVALID_FORMAT_ERROR
;
283 error(*line
, "invariant characters required for table keys, binary data, etc.");
287 result
= static_cast<char *>(uprv_malloc(count
+1));
291 *status
= U_MEMORY_ALLOCATION_ERROR
;
295 u_UCharsToChars(tokenValue
->fChars
, result
, count
+1);
299 static struct SResource
*
300 parseUCARules(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* /*comment*/, UErrorCode
*status
)
302 struct SResource
*result
= NULL
;
303 struct UString
*tokenValue
;
304 FileStream
*file
= NULL
;
305 char filename
[256] = { '\0' };
306 char cs
[128] = { '\0' };
308 UBool quoted
= FALSE
;
309 UCHARBUF
*ucbuf
=NULL
;
311 const char* cp
= NULL
;
312 UChar
*pTarget
= NULL
;
313 UChar
*target
= NULL
;
314 UChar
*targetLimit
= NULL
;
317 expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
);
320 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
323 if (U_FAILURE(*status
))
327 /* make the filename including the directory */
328 if (state
->inputdir
!= NULL
)
330 uprv_strcat(filename
, state
->inputdir
);
332 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
334 uprv_strcat(filename
, U_FILE_SEP_STRING
);
338 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
340 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
342 if (U_FAILURE(*status
))
346 uprv_strcat(filename
, cs
);
348 if(state
->omitCollationRules
) {
352 ucbuf
= ucbuf_open(filename
, &cp
, getShowWarning(),FALSE
, status
);
354 if (U_FAILURE(*status
)) {
355 error(line
, "An error occured while opening the input file %s\n", filename
);
359 /* We allocate more space than actually required
360 * since the actual size needed for storing UChars
361 * is not known in UTF-8 byte stream
363 size
= ucbuf_size(ucbuf
) + 1;
364 pTarget
= (UChar
*) uprv_malloc(U_SIZEOF_UCHAR
* size
);
365 uprv_memset(pTarget
, 0, size
*U_SIZEOF_UCHAR
);
367 targetLimit
= pTarget
+size
;
369 /* read the rules into the buffer */
370 while (target
< targetLimit
)
372 c
= ucbuf_getc(ucbuf
, status
);
374 quoted
= (UBool
)!quoted
;
376 /* weiv (06/26/2002): adding the following:
377 * - preserving spaces in commands [...]
378 * - # comments until the end of line
380 if (c
== STARTCOMMAND
&& !quoted
)
383 * closing bracket will be handled by the
384 * append at the end of the loop
386 while(c
!= ENDCOMMAND
) {
387 U_APPEND_CHAR32_ONLY(c
, target
);
388 c
= ucbuf_getc(ucbuf
, status
);
391 else if (c
== HASH
&& !quoted
) {
393 while(c
!= CR
&& c
!= LF
) {
394 c
= ucbuf_getc(ucbuf
, status
);
398 else if (c
== ESCAPE
)
400 c
= unescape(ucbuf
, status
);
402 if (c
== (UChar32
)U_ERR
)
405 T_FileStream_close(file
);
409 else if (!quoted
&& (c
== SPACE
|| c
== TAB
|| c
== CR
|| c
== LF
))
411 /* ignore spaces carriage returns
412 * and line feed unless in the form \uXXXX
417 /* Append UChar * after dissembling if c > 0xffff*/
418 if (c
!= (UChar32
)U_EOF
)
420 U_APPEND_CHAR32_ONLY(c
, target
);
428 /* terminate the string */
429 if(target
< targetLimit
){
433 result
= string_open(state
->bundle
, tag
, pTarget
, (int32_t)(target
- pTarget
), NULL
, status
);
438 T_FileStream_close(file
);
443 static struct SResource
*
444 parseTransliterator(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* /*comment*/, UErrorCode
*status
)
446 struct SResource
*result
= NULL
;
447 struct UString
*tokenValue
;
448 FileStream
*file
= NULL
;
449 char filename
[256] = { '\0' };
450 char cs
[128] = { '\0' };
452 UCHARBUF
*ucbuf
=NULL
;
453 const char* cp
= NULL
;
454 UChar
*pTarget
= NULL
;
455 const UChar
*pSource
= NULL
;
458 expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
);
461 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
464 if (U_FAILURE(*status
))
468 /* make the filename including the directory */
469 if (state
->inputdir
!= NULL
)
471 uprv_strcat(filename
, state
->inputdir
);
473 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
475 uprv_strcat(filename
, U_FILE_SEP_STRING
);
479 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
481 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
483 if (U_FAILURE(*status
))
487 uprv_strcat(filename
, cs
);
490 ucbuf
= ucbuf_open(filename
, &cp
, getShowWarning(),FALSE
, status
);
492 if (U_FAILURE(*status
)) {
493 error(line
, "An error occured while opening the input file %s\n", filename
);
497 /* We allocate more space than actually required
498 * since the actual size needed for storing UChars
499 * is not known in UTF-8 byte stream
501 pSource
= ucbuf_getBuffer(ucbuf
, &size
, status
);
502 pTarget
= (UChar
*) uprv_malloc(U_SIZEOF_UCHAR
* (size
+ 1));
503 uprv_memset(pTarget
, 0, size
*U_SIZEOF_UCHAR
);
505 #if !UCONFIG_NO_TRANSLITERATION
506 size
= utrans_stripRules(pSource
, size
, pTarget
, status
);
509 fprintf(stderr
, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
511 result
= string_open(state
->bundle
, tag
, pTarget
, size
, NULL
, status
);
515 T_FileStream_close(file
);
519 static struct SResource
* dependencyArray
= NULL
;
521 static struct SResource
*
522 parseDependency(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
524 struct SResource
*result
= NULL
;
525 struct SResource
*elem
= NULL
;
526 struct UString
*tokenValue
;
528 char filename
[256] = { '\0' };
529 char cs
[128] = { '\0' };
531 expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
);
534 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
537 if (U_FAILURE(*status
))
541 /* make the filename including the directory */
542 if (state
->outputdir
!= NULL
)
544 uprv_strcat(filename
, state
->outputdir
);
546 if (state
->outputdir
[state
->outputdirLength
- 1] != U_FILE_SEP_CHAR
)
548 uprv_strcat(filename
, U_FILE_SEP_STRING
);
552 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
554 if (U_FAILURE(*status
))
558 uprv_strcat(filename
, cs
);
559 if(!T_FileStream_file_exists(filename
)){
561 error(line
, "The dependency file %s does not exist. Please make sure it exists.\n",filename
);
563 warning(line
, "The dependency file %s does not exist. Please make sure it exists.\n",filename
);
566 if(dependencyArray
==NULL
){
567 dependencyArray
= array_open(state
->bundle
, "%%DEPENDENCY", NULL
, status
);
570 result
= string_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
572 elem
= string_open(state
->bundle
, NULL
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
574 array_add(dependencyArray
, elem
, status
);
576 if (U_FAILURE(*status
))
580 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
583 static struct SResource
*
584 parseString(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
586 struct UString
*tokenValue
;
587 struct SResource
*result
= NULL
;
589 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
591 return parseUCARules(tag, startline, status);
594 printf(" string %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
596 expect(state
, TOK_STRING
, &tokenValue
, NULL
, NULL
, status
);
598 if (U_SUCCESS(*status
))
600 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
601 doesn't survive expect either) */
603 result
= string_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
604 if(U_SUCCESS(*status
) && result
) {
605 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
607 if (U_FAILURE(*status
))
618 static struct SResource
*
619 parseAlias(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
621 struct UString
*tokenValue
;
622 struct SResource
*result
= NULL
;
624 expect(state
, TOK_STRING
, &tokenValue
, NULL
, NULL
, status
);
627 printf(" alias %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
630 if (U_SUCCESS(*status
))
632 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
633 doesn't survive expect either) */
635 result
= alias_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
637 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
639 if (U_FAILURE(*status
))
649 #if !UCONFIG_NO_COLLATION
653 static struct SResource
* resLookup(struct SResource
* res
, const char* key
){
654 struct SResource
*current
= NULL
;
655 struct SResTable
*list
;
656 if (res
== res_none()) {
660 list
= &(res
->u
.fTable
);
662 current
= list
->fFirst
;
663 while (current
!= NULL
) {
664 if (uprv_strcmp(((list
->fRoot
->fKeys
) + (current
->fKey
)), key
) == 0) {
667 current
= current
->fNext
;
672 class GenrbImporter
: public icu::CollationRuleParser::Importer
{
674 GenrbImporter(const char *in
, const char *out
) : inputDir(in
), outputDir(out
) {}
675 virtual ~GenrbImporter();
676 virtual const UnicodeString
*getRules(
677 const char *localeID
, const char *collationType
,
678 const char *&errorReason
, UErrorCode
&errorCode
);
681 const char *inputDir
;
682 const char *outputDir
;
686 GenrbImporter::~GenrbImporter() {}
688 const UnicodeString
*
689 GenrbImporter::getRules(
690 const char *localeID
, const char *collationType
,
691 const char *& /*errorReason*/, UErrorCode
&errorCode
) {
692 struct SRBRoot
*data
= NULL
;
693 UCHARBUF
*ucbuf
= NULL
;
694 int localeLength
= strlen(localeID
);
695 char* filename
= (char*)uprv_malloc(localeLength
+5);
696 char *inputDirBuf
= NULL
;
697 char *openFileName
= NULL
;
702 struct SResource
* root
;
703 struct SResource
* collations
;
704 struct SResource
* collation
;
705 struct SResource
* sequence
;
707 memcpy(filename
, localeID
, localeLength
);
708 for(i
= 0; i
< localeLength
; i
++){
709 if(filename
[i
] == '-'){
713 filename
[localeLength
] = '.';
714 filename
[localeLength
+1] = 't';
715 filename
[localeLength
+2] = 'x';
716 filename
[localeLength
+3] = 't';
717 filename
[localeLength
+4] = 0;
720 if (U_FAILURE(errorCode
)) {
724 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
727 filelen
= (int32_t)uprv_strlen(filename
);
729 if(inputDir
== NULL
) {
730 const char *filenameBegin
= uprv_strrchr(filename
, U_FILE_SEP_CHAR
);
731 openFileName
= (char *) uprv_malloc(dirlen
+ filelen
+ 2);
732 openFileName
[0] = '\0';
733 if (filenameBegin
!= NULL
) {
735 * When a filename ../../../data/root.txt is specified,
736 * we presume that the input directory is ../../../data
737 * This is very important when the resource file includes
738 * another file, like UCARules.txt or thaidict.brk.
740 int32_t filenameSize
= (int32_t)(filenameBegin
- filename
+ 1);
741 inputDirBuf
= (char *)uprv_malloc(filenameSize
);
744 if(inputDirBuf
== NULL
) {
745 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
749 uprv_strncpy(inputDirBuf
, filename
, filenameSize
);
750 inputDirBuf
[filenameSize
- 1] = 0;
751 inputDir
= inputDirBuf
;
752 dirlen
= (int32_t)uprv_strlen(inputDir
);
755 dirlen
= (int32_t)uprv_strlen(inputDir
);
757 if(inputDir
[dirlen
-1] != U_FILE_SEP_CHAR
) {
758 openFileName
= (char *) uprv_malloc(dirlen
+ filelen
+ 2);
761 if(openFileName
== NULL
) {
762 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
766 openFileName
[0] = '\0';
768 * append the input dir to openFileName if the first char in
769 * filename is not file seperation char and the last char input directory is not '.'.
770 * This is to support :
771 * genrb -s. /home/icu/data
773 * The user cannot mix notations like
774 * genrb -s. /icu/data --- the absolute path specified. -s redundant
776 * genrb -s. icu/data --- start from CWD and look in icu/data dir
778 if( (filename
[0] != U_FILE_SEP_CHAR
) && (inputDir
[dirlen
-1] !='.')){
779 uprv_strcpy(openFileName
, inputDir
);
780 openFileName
[dirlen
] = U_FILE_SEP_CHAR
;
782 openFileName
[dirlen
+ 1] = '\0';
784 openFileName
= (char *) uprv_malloc(dirlen
+ filelen
+ 1);
787 if(openFileName
== NULL
) {
788 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
792 uprv_strcpy(openFileName
, inputDir
);
796 uprv_strcat(openFileName
, filename
);
797 /* printf("%s\n", openFileName); */
798 errorCode
= U_ZERO_ERROR
;
799 ucbuf
= ucbuf_open(openFileName
, &cp
,getShowWarning(),TRUE
, &errorCode
);
801 if(errorCode
== U_FILE_ACCESS_ERROR
) {
803 fprintf(stderr
, "couldn't open file %s\n", openFileName
== NULL
? filename
: openFileName
);
806 if (ucbuf
== NULL
|| U_FAILURE(errorCode
)) {
807 fprintf(stderr
, "An error occured processing file %s. Error: %s\n", openFileName
== NULL
? filename
: openFileName
,u_errorName(errorCode
));
811 /* Parse the data into an SRBRoot */
812 data
= parse(ucbuf
, inputDir
, outputDir
, filename
, FALSE
, FALSE
, &errorCode
);
815 collations
= resLookup(root
, "collations");
816 if (collations
!= NULL
) {
817 collation
= resLookup(collations
, collationType
);
818 if (collation
!= NULL
) {
819 sequence
= resLookup(collation
, "Sequence");
820 if (sequence
!= NULL
) {
821 rules
.setTo(FALSE
, sequence
->u
.fString
.fChars
, sequence
->u
.fString
.fLength
);
827 if (inputDirBuf
!= NULL
) {
828 uprv_free(inputDirBuf
);
831 if (openFileName
!= NULL
) {
832 uprv_free(openFileName
);
842 // Quick-and-dirty escaping function.
843 // Assumes that we are on an ASCII-based platform.
845 escape(const UChar
*s
, char *buffer
) {
846 int32_t length
= u_strlen(s
);
850 U16_NEXT(s
, i
, length
, c
);
854 } else if (0x20 <= c
&& c
<= 0x7e) {
856 *buffer
++ = (char)c
; // assumes ASCII-based platform
858 buffer
+= sprintf(buffer
, "\\u%04X", (int)c
);
865 #endif // !UCONFIG_NO_COLLATION
867 static struct SResource
*
868 addCollation(ParseState
* state
, struct SResource
*result
, const char *collationType
,
869 uint32_t startline
, UErrorCode
*status
)
871 // TODO: Use LocalPointer for result, or make caller close it when there is a failure.
872 struct SResource
*member
= NULL
;
873 struct UString
*tokenValue
;
874 struct UString comment
;
875 enum ETokenType token
;
878 UBool haveRules
= FALSE
;
879 UVersionInfo version
;
882 /* '{' . (name resource)* '}' */
883 version
[0]=0; version
[1]=0; version
[2]=0; version
[3]=0;
888 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
890 if (token
== TOK_CLOSE_BRACE
)
895 if (token
!= TOK_STRING
)
898 *status
= U_INVALID_FORMAT_ERROR
;
900 if (token
== TOK_EOF
)
902 error(startline
, "unterminated table");
906 error(line
, "Unexpected token %s", tokenNames
[token
]);
912 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
914 if (U_FAILURE(*status
))
920 member
= parseResource(state
, subtag
, NULL
, status
);
922 if (U_FAILURE(*status
))
929 // Ignore the parsed resources, continue parsing.
931 else if (uprv_strcmp(subtag
, "Version") == 0)
934 int32_t length
= member
->u
.fString
.fLength
;
936 if (length
>= (int32_t) sizeof(ver
))
938 length
= (int32_t) sizeof(ver
) - 1;
941 u_UCharsToChars(member
->u
.fString
.fChars
, ver
, length
+ 1); /* +1 for copying NULL */
942 u_versionFromString(version
, ver
);
944 table_add(result
, member
, line
, status
);
947 else if(uprv_strcmp(subtag
, "%%CollationBin")==0)
949 /* discard duplicate %%CollationBin if any*/
951 else if (uprv_strcmp(subtag
, "Sequence") == 0)
953 rules
.setTo(member
->u
.fString
.fChars
, member
->u
.fString
.fLength
);
955 // Defer building the collator until we have seen
956 // all sub-elements of the collation table, including the Version.
957 /* in order to achieve smaller data files, we can direct genrb */
958 /* to omit collation rules */
959 if(!state
->omitCollationRules
) {
960 table_add(result
, member
, line
, status
);
964 else // Just copy non-special items.
966 table_add(result
, member
, line
, status
);
969 res_close(member
); // TODO: use LocalPointer
970 if (U_FAILURE(*status
))
977 if (!haveRules
) { return result
; }
979 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
980 warning(line
, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
983 if(!state
->makeBinaryCollation
) {
985 printf("Not building %s~%s collation binary\n", state
->filename
, collationType
);
989 UErrorCode intStatus
= U_ZERO_ERROR
;
990 UParseError parseError
;
991 uprv_memset(&parseError
, 0, sizeof(parseError
));
992 GenrbImporter
importer(state
->inputdir
, state
->outputdir
);
993 const icu::CollationTailoring
*base
= icu::CollationRoot::getRoot(intStatus
);
994 if(U_FAILURE(intStatus
)) {
995 error(line
, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus
));
997 return NULL
; // TODO: use LocalUResourceBundlePointer for result
999 icu::CollationBuilder
builder(base
, intStatus
);
1000 if(uprv_strncmp(collationType
, "search", 6) == 0) {
1001 builder
.disableFastLatin(); // build fast-Latin table unless search collator
1003 LocalPointer
<icu::CollationTailoring
> t(
1004 builder
.parseAndBuild(rules
, version
, &importer
, &parseError
, intStatus
));
1005 if(U_FAILURE(intStatus
)) {
1006 const char *reason
= builder
.getErrorReason();
1007 if(reason
== NULL
) { reason
= ""; }
1008 error(line
, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s %s",
1009 state
->filename
, collationType
,
1010 (long)parseError
.offset
, u_errorName(intStatus
), reason
);
1011 if(parseError
.preContext
[0] != 0 || parseError
.postContext
[0] != 0) {
1012 // Print pre- and post-context.
1013 char preBuffer
[100], postBuffer
[100];
1014 escape(parseError
.preContext
, preBuffer
);
1015 escape(parseError
.postContext
, postBuffer
);
1016 error(line
, " error context: \"...%s\" ! \"%s...\"", preBuffer
, postBuffer
);
1019 *status
= intStatus
;
1024 icu::LocalMemory
<uint8_t> buffer
;
1025 int32_t capacity
= 100000;
1026 uint8_t *dest
= buffer
.allocateInsteadAndCopy(capacity
);
1028 fprintf(stderr
, "memory allocation (%ld bytes) for file contents failed\n",
1030 *status
= U_MEMORY_ALLOCATION_ERROR
;
1034 int32_t indexes
[icu::CollationDataReader::IX_TOTAL_SIZE
+ 1];
1035 int32_t totalSize
= icu::CollationDataWriter::writeTailoring(
1036 *t
, *t
->settings
, indexes
, dest
, capacity
, intStatus
);
1037 if(intStatus
== U_BUFFER_OVERFLOW_ERROR
) {
1038 intStatus
= U_ZERO_ERROR
;
1039 capacity
= totalSize
;
1040 dest
= buffer
.allocateInsteadAndCopy(capacity
);
1042 fprintf(stderr
, "memory allocation (%ld bytes) for file contents failed\n",
1044 *status
= U_MEMORY_ALLOCATION_ERROR
;
1048 totalSize
= icu::CollationDataWriter::writeTailoring(
1049 *t
, *t
->settings
, indexes
, dest
, capacity
, intStatus
);
1051 if(U_FAILURE(intStatus
)) {
1052 fprintf(stderr
, "CollationDataWriter::writeTailoring() failed: %s\n",
1053 u_errorName(intStatus
));
1058 printf("%s~%s collation tailoring part sizes:\n", state
->filename
, collationType
);
1059 icu::CollationInfo::printSizes(totalSize
, indexes
);
1061 struct SResource
*collationBin
= bin_open(state
->bundle
, "%%CollationBin", totalSize
, dest
, NULL
, NULL
, status
);
1062 table_add(result
, collationBin
, line
, status
);
1063 if (U_FAILURE(*status
)) {
1072 keepCollationType(const char *type
) {
1073 return gIncludeUnihanColl
|| uprv_strcmp(type
, "unihan") != 0;
1076 static struct SResource
*
1077 parseCollationElements(ParseState
* state
, char *tag
, uint32_t startline
, UBool newCollation
, UErrorCode
*status
)
1079 struct SResource
*result
= NULL
;
1080 struct SResource
*member
= NULL
;
1081 struct SResource
*collationRes
= NULL
;
1082 struct UString
*tokenValue
;
1083 struct UString comment
;
1084 enum ETokenType token
;
1085 char subtag
[1024], typeKeyword
[1024];
1088 result
= table_open(state
->bundle
, tag
, NULL
, status
);
1090 if (result
== NULL
|| U_FAILURE(*status
))
1095 printf(" collation elements %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1098 return addCollation(state
, result
, "(no type)", startline
, status
);
1102 ustr_init(&comment
);
1103 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
1105 if (token
== TOK_CLOSE_BRACE
)
1110 if (token
!= TOK_STRING
)
1113 *status
= U_INVALID_FORMAT_ERROR
;
1115 if (token
== TOK_EOF
)
1117 error(startline
, "unterminated table");
1121 error(line
, "Unexpected token %s", tokenNames
[token
]);
1127 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
1129 if (U_FAILURE(*status
))
1135 if (uprv_strcmp(subtag
, "default") == 0)
1137 member
= parseResource(state
, subtag
, NULL
, status
);
1139 if (U_FAILURE(*status
))
1145 table_add(result
, member
, line
, status
);
1149 token
= peekToken(state
, 0, &tokenValue
, &line
, &comment
, status
);
1150 /* this probably needs to be refactored or recursively use the parser */
1151 /* first we assume that our collation table won't have the explicit type */
1152 /* then, we cannot handle aliases */
1153 if(token
== TOK_OPEN_BRACE
) {
1154 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
1155 if (keepCollationType(subtag
)) {
1156 collationRes
= table_open(state
->bundle
, subtag
, NULL
, status
);
1158 collationRes
= NULL
;
1160 // need to parse the collation data regardless
1161 collationRes
= addCollation(state
, collationRes
, subtag
, startline
, status
);
1162 if (collationRes
!= NULL
) {
1163 table_add(result
, collationRes
, startline
, status
);
1165 } else if(token
== TOK_COLON
) { /* right now, we'll just try to see if we have aliases */
1166 /* we could have a table too */
1167 token
= peekToken(state
, 1, &tokenValue
, &line
, &comment
, status
);
1168 u_UCharsToChars(tokenValue
->fChars
, typeKeyword
, u_strlen(tokenValue
->fChars
) + 1);
1169 if(uprv_strcmp(typeKeyword
, "alias") == 0) {
1170 member
= parseResource(state
, subtag
, NULL
, status
);
1171 if (U_FAILURE(*status
))
1177 table_add(result
, member
, line
, status
);
1180 *status
= U_INVALID_FORMAT_ERROR
;
1185 *status
= U_INVALID_FORMAT_ERROR
;
1190 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1192 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1194 if (U_FAILURE(*status
))
1203 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1204 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1205 static struct SResource
*
1206 realParseTable(ParseState
* state
, struct SResource
*table
, char *tag
, uint32_t startline
, UErrorCode
*status
)
1208 struct SResource
*member
= NULL
;
1209 struct UString
*tokenValue
=NULL
;
1210 struct UString comment
;
1211 enum ETokenType token
;
1214 UBool readToken
= FALSE
;
1216 /* '{' . (name resource)* '}' */
1219 printf(" parsing table %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1223 ustr_init(&comment
);
1224 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
1226 if (token
== TOK_CLOSE_BRACE
)
1229 warning(startline
, "Encountered empty table");
1234 if (token
!= TOK_STRING
)
1236 *status
= U_INVALID_FORMAT_ERROR
;
1238 if (token
== TOK_EOF
)
1240 error(startline
, "unterminated table");
1244 error(line
, "unexpected token %s", tokenNames
[token
]);
1250 if(uprv_isInvariantUString(tokenValue
->fChars
, -1)) {
1251 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
1253 *status
= U_INVALID_FORMAT_ERROR
;
1254 error(line
, "invariant characters required for table keys");
1258 if (U_FAILURE(*status
))
1260 error(line
, "parse error. Stopped parsing tokens with %s", u_errorName(*status
));
1264 member
= parseResource(state
, subtag
, &comment
, status
);
1266 if (member
== NULL
|| U_FAILURE(*status
))
1268 error(line
, "parse error. Stopped parsing resource with %s", u_errorName(*status
));
1272 table_add(table
, member
, line
, status
);
1274 if (U_FAILURE(*status
))
1276 error(line
, "parse error. Stopped parsing table with %s", u_errorName(*status
));
1280 ustr_deinit(&comment
);
1284 /* A compiler warning will appear if all paths don't contain a return statement. */
1285 /* *status = U_INTERNAL_PROGRAM_ERROR;
1289 static struct SResource
*
1290 parseTable(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1292 struct SResource
*result
;
1294 if (tag
!= NULL
&& uprv_strcmp(tag
, "CollationElements") == 0)
1296 return parseCollationElements(state
, tag
, startline
, FALSE
, status
);
1298 if (tag
!= NULL
&& uprv_strcmp(tag
, "collations") == 0)
1300 return parseCollationElements(state
, tag
, startline
, TRUE
, status
);
1303 printf(" table %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1306 result
= table_open(state
->bundle
, tag
, comment
, status
);
1308 if (result
== NULL
|| U_FAILURE(*status
))
1312 return realParseTable(state
, result
, tag
, startline
, status
);
1315 static struct SResource
*
1316 parseArray(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1318 struct SResource
*result
= NULL
;
1319 struct SResource
*member
= NULL
;
1320 struct UString
*tokenValue
;
1321 struct UString memberComments
;
1322 enum ETokenType token
;
1323 UBool readToken
= FALSE
;
1325 result
= array_open(state
->bundle
, tag
, comment
, status
);
1327 if (result
== NULL
|| U_FAILURE(*status
))
1332 printf(" array %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1335 ustr_init(&memberComments
);
1337 /* '{' . resource [','] '}' */
1341 ustr_setlen(&memberComments
, 0, status
);
1343 /* check for end of array, but don't consume next token unless it really is the end */
1344 token
= peekToken(state
, 0, &tokenValue
, NULL
, &memberComments
, status
);
1347 if (token
== TOK_CLOSE_BRACE
)
1349 getToken(state
, NULL
, NULL
, NULL
, status
);
1351 warning(startline
, "Encountered empty array");
1356 if (token
== TOK_EOF
)
1359 *status
= U_INVALID_FORMAT_ERROR
;
1360 error(startline
, "unterminated array");
1364 /* string arrays are a special case */
1365 if (token
== TOK_STRING
)
1367 getToken(state
, &tokenValue
, &memberComments
, NULL
, status
);
1368 member
= string_open(state
->bundle
, NULL
, tokenValue
->fChars
, tokenValue
->fLength
, &memberComments
, status
);
1372 member
= parseResource(state
, NULL
, &memberComments
, status
);
1375 if (member
== NULL
|| U_FAILURE(*status
))
1381 array_add(result
, member
, status
);
1383 if (U_FAILURE(*status
))
1389 /* eat optional comma if present */
1390 token
= peekToken(state
, 0, NULL
, NULL
, NULL
, status
);
1392 if (token
== TOK_COMMA
)
1394 getToken(state
, NULL
, NULL
, NULL
, status
);
1397 if (U_FAILURE(*status
))
1405 ustr_deinit(&memberComments
);
1409 static struct SResource
*
1410 parseIntVector(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1412 struct SResource
*result
= NULL
;
1413 enum ETokenType token
;
1416 UBool readToken
= FALSE
;
1419 struct UString memberComments
;
1421 result
= intvector_open(state
->bundle
, tag
, comment
, status
);
1423 if (result
== NULL
|| U_FAILURE(*status
))
1429 printf(" vector %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1431 ustr_init(&memberComments
);
1432 /* '{' . string [','] '}' */
1435 ustr_setlen(&memberComments
, 0, status
);
1437 /* check for end of array, but don't consume next token unless it really is the end */
1438 token
= peekToken(state
, 0, NULL
, NULL
,&memberComments
, status
);
1440 if (token
== TOK_CLOSE_BRACE
)
1442 /* it's the end, consume the close brace */
1443 getToken(state
, NULL
, NULL
, NULL
, status
);
1445 warning(startline
, "Encountered empty int vector");
1447 ustr_deinit(&memberComments
);
1451 string
= getInvariantString(state
, NULL
, NULL
, status
);
1453 if (U_FAILURE(*status
))
1459 /* For handling illegal char in the Intvector */
1460 value
= uprv_strtoul(string
, &stopstring
, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1461 len
=(uint32_t)(stopstring
-string
);
1463 if(len
==uprv_strlen(string
))
1465 intvector_add(result
, value
, status
);
1467 token
= peekToken(state
, 0, NULL
, NULL
, NULL
, status
);
1472 *status
=U_INVALID_CHAR_FOUND
;
1475 if (U_FAILURE(*status
))
1481 /* the comma is optional (even though it is required to prevent the reader from concatenating
1482 consecutive entries) so that a missing comma on the last entry isn't an error */
1483 if (token
== TOK_COMMA
)
1485 getToken(state
, NULL
, NULL
, NULL
, status
);
1491 /* A compiler warning will appear if all paths don't contain a return statement. */
1492 /* intvector_close(result, status);
1493 *status = U_INTERNAL_PROGRAM_ERROR;
1497 static struct SResource
*
1498 parseBinary(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1500 struct SResource
*result
= NULL
;
1503 char toConv
[3] = {'\0', '\0', '\0'};
1510 string
= getInvariantString(state
, &line
, NULL
, status
);
1512 if (string
== NULL
|| U_FAILURE(*status
))
1517 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1519 if (U_FAILURE(*status
))
1526 printf(" binary %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1529 count
= (uint32_t)uprv_strlen(string
);
1532 value
= static_cast<uint8_t *>(uprv_malloc(sizeof(uint8_t) * count
));
1537 *status
= U_MEMORY_ALLOCATION_ERROR
;
1541 for (i
= 0; i
< count
; i
+= 2)
1543 toConv
[0] = string
[i
];
1544 toConv
[1] = string
[i
+ 1];
1546 value
[i
>> 1] = (uint8_t) uprv_strtoul(toConv
, &stopstring
, 16);
1547 len
=(uint32_t)(stopstring
-toConv
);
1549 if(len
!=uprv_strlen(toConv
))
1552 *status
=U_INVALID_CHAR_FOUND
;
1557 result
= bin_open(state
->bundle
, tag
, (i
>> 1), value
,NULL
, comment
, status
);
1563 *status
= U_INVALID_CHAR_FOUND
;
1565 error(line
, "Encountered invalid binary string");
1571 result
= bin_open(state
->bundle
, tag
, 0, NULL
, "",comment
,status
);
1572 warning(startline
, "Encountered empty binary tag");
1579 static struct SResource
*
1580 parseInteger(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1582 struct SResource
*result
= NULL
;
1588 string
= getInvariantString(state
, NULL
, NULL
, status
);
1590 if (string
== NULL
|| U_FAILURE(*status
))
1595 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1597 if (U_FAILURE(*status
))
1604 printf(" integer %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1607 if (uprv_strlen(string
) <= 0)
1609 warning(startline
, "Encountered empty integer. Default value is 0.");
1612 /* Allow integer support for hexdecimal, octal digit and decimal*/
1613 /* and handle illegal char in the integer*/
1614 value
= uprv_strtoul(string
, &stopstring
, 0);
1615 len
=(uint32_t)(stopstring
-string
);
1616 if(len
==uprv_strlen(string
))
1618 result
= int_open(state
->bundle
, tag
, value
, comment
, status
);
1622 *status
=U_INVALID_CHAR_FOUND
;
1629 static struct SResource
*
1630 parseImport(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
1632 struct SResource
*result
;
1638 char *fullname
= NULL
;
1639 filename
= getInvariantString(state
, &line
, NULL
, status
);
1641 if (U_FAILURE(*status
))
1646 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1648 if (U_FAILURE(*status
))
1650 uprv_free(filename
);
1655 printf(" import %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1658 /* Open the input file for reading */
1659 if (state
->inputdir
== NULL
)
1663 * Always save file file name, even if there's
1664 * no input directory specified. MIGHT BREAK SOMETHING
1666 int32_t filenameLength
= uprv_strlen(filename
);
1668 fullname
= (char *) uprv_malloc(filenameLength
+ 1);
1669 uprv_strcpy(fullname
, filename
);
1672 file
= T_FileStream_open(filename
, "rb");
1677 int32_t count
= (int32_t)uprv_strlen(filename
);
1679 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
1681 fullname
= (char *) uprv_malloc(state
->inputdirLength
+ count
+ 2);
1684 if(fullname
== NULL
)
1686 *status
= U_MEMORY_ALLOCATION_ERROR
;
1690 uprv_strcpy(fullname
, state
->inputdir
);
1692 fullname
[state
->inputdirLength
] = U_FILE_SEP_CHAR
;
1693 fullname
[state
->inputdirLength
+ 1] = '\0';
1695 uprv_strcat(fullname
, filename
);
1699 fullname
= (char *) uprv_malloc(state
->inputdirLength
+ count
+ 1);
1702 if(fullname
== NULL
)
1704 *status
= U_MEMORY_ALLOCATION_ERROR
;
1708 uprv_strcpy(fullname
, state
->inputdir
);
1709 uprv_strcat(fullname
, filename
);
1712 file
= T_FileStream_open(fullname
, "rb");
1718 error(line
, "couldn't open input file %s", filename
);
1719 *status
= U_FILE_ACCESS_ERROR
;
1723 len
= T_FileStream_size(file
);
1724 data
= (uint8_t*)uprv_malloc(len
* sizeof(uint8_t));
1728 *status
= U_MEMORY_ALLOCATION_ERROR
;
1729 T_FileStream_close (file
);
1733 /* int32_t numRead = */ T_FileStream_read (file
, data
, len
);
1734 T_FileStream_close (file
);
1736 result
= bin_open(state
->bundle
, tag
, len
, data
, fullname
, comment
, status
);
1739 uprv_free(filename
);
1740 uprv_free(fullname
);
1745 static struct SResource
*
1746 parseInclude(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
1748 struct SResource
*result
;
1752 UChar
*pTarget
= NULL
;
1755 char *fullname
= NULL
;
1757 const char* cp
= NULL
;
1758 const UChar
* uBuffer
= NULL
;
1760 filename
= getInvariantString(state
, &line
, NULL
, status
);
1761 count
= (int32_t)uprv_strlen(filename
);
1763 if (U_FAILURE(*status
))
1768 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1770 if (U_FAILURE(*status
))
1772 uprv_free(filename
);
1777 printf(" include %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1780 fullname
= (char *) uprv_malloc(state
->inputdirLength
+ count
+ 2);
1782 if(fullname
== NULL
)
1784 *status
= U_MEMORY_ALLOCATION_ERROR
;
1785 uprv_free(filename
);
1789 if(state
->inputdir
!=NULL
){
1790 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
1793 uprv_strcpy(fullname
, state
->inputdir
);
1795 fullname
[state
->inputdirLength
] = U_FILE_SEP_CHAR
;
1796 fullname
[state
->inputdirLength
+ 1] = '\0';
1798 uprv_strcat(fullname
, filename
);
1802 uprv_strcpy(fullname
, state
->inputdir
);
1803 uprv_strcat(fullname
, filename
);
1806 uprv_strcpy(fullname
,filename
);
1809 ucbuf
= ucbuf_open(fullname
, &cp
,getShowWarning(),FALSE
,status
);
1811 if (U_FAILURE(*status
)) {
1812 error(line
, "couldn't open input file %s\n", filename
);
1816 uBuffer
= ucbuf_getBuffer(ucbuf
,&len
,status
);
1817 result
= string_open(state
->bundle
, tag
, uBuffer
, len
, comment
, status
);
1823 uprv_free(filename
);
1824 uprv_free(fullname
);
1833 U_STRING_DECL(k_type_string
, "string", 6);
1834 U_STRING_DECL(k_type_binary
, "binary", 6);
1835 U_STRING_DECL(k_type_bin
, "bin", 3);
1836 U_STRING_DECL(k_type_table
, "table", 5);
1837 U_STRING_DECL(k_type_table_no_fallback
, "table(nofallback)", 17);
1838 U_STRING_DECL(k_type_int
, "int", 3);
1839 U_STRING_DECL(k_type_integer
, "integer", 7);
1840 U_STRING_DECL(k_type_array
, "array", 5);
1841 U_STRING_DECL(k_type_alias
, "alias", 5);
1842 U_STRING_DECL(k_type_intvector
, "intvector", 9);
1843 U_STRING_DECL(k_type_import
, "import", 6);
1844 U_STRING_DECL(k_type_include
, "include", 7);
1846 /* Various non-standard processing plugins that create one or more special resources. */
1847 U_STRING_DECL(k_type_plugin_uca_rules
, "process(uca_rules)", 18);
1848 U_STRING_DECL(k_type_plugin_collation
, "process(collation)", 18);
1849 U_STRING_DECL(k_type_plugin_transliterator
, "process(transliterator)", 23);
1850 U_STRING_DECL(k_type_plugin_dependency
, "process(dependency)", 19);
1852 typedef enum EResourceType
1858 RESTYPE_TABLE_NO_FALLBACK
,
1865 RESTYPE_PROCESS_UCA_RULES
,
1866 RESTYPE_PROCESS_COLLATION
,
1867 RESTYPE_PROCESS_TRANSLITERATOR
,
1868 RESTYPE_PROCESS_DEPENDENCY
,
1873 const char *nameChars
; /* only used for debugging */
1874 const UChar
*nameUChars
;
1875 ParseResourceFunction
*parseFunction
;
1876 } gResourceTypes
[] = {
1877 {"Unknown", NULL
, NULL
},
1878 {"string", k_type_string
, parseString
},
1879 {"binary", k_type_binary
, parseBinary
},
1880 {"table", k_type_table
, parseTable
},
1881 {"table(nofallback)", k_type_table_no_fallback
, NULL
}, /* parseFunction will never be called */
1882 {"integer", k_type_integer
, parseInteger
},
1883 {"array", k_type_array
, parseArray
},
1884 {"alias", k_type_alias
, parseAlias
},
1885 {"intvector", k_type_intvector
, parseIntVector
},
1886 {"import", k_type_import
, parseImport
},
1887 {"include", k_type_include
, parseInclude
},
1888 {"process(uca_rules)", k_type_plugin_uca_rules
, parseUCARules
},
1889 {"process(collation)", k_type_plugin_collation
, NULL
/* not implemented yet */},
1890 {"process(transliterator)", k_type_plugin_transliterator
, parseTransliterator
},
1891 {"process(dependency)", k_type_plugin_dependency
, parseDependency
},
1892 {"reserved", NULL
, NULL
}
1897 U_STRING_INIT(k_type_string
, "string", 6);
1898 U_STRING_INIT(k_type_binary
, "binary", 6);
1899 U_STRING_INIT(k_type_bin
, "bin", 3);
1900 U_STRING_INIT(k_type_table
, "table", 5);
1901 U_STRING_INIT(k_type_table_no_fallback
, "table(nofallback)", 17);
1902 U_STRING_INIT(k_type_int
, "int", 3);
1903 U_STRING_INIT(k_type_integer
, "integer", 7);
1904 U_STRING_INIT(k_type_array
, "array", 5);
1905 U_STRING_INIT(k_type_alias
, "alias", 5);
1906 U_STRING_INIT(k_type_intvector
, "intvector", 9);
1907 U_STRING_INIT(k_type_import
, "import", 6);
1908 U_STRING_INIT(k_type_include
, "include", 7);
1910 U_STRING_INIT(k_type_plugin_uca_rules
, "process(uca_rules)", 18);
1911 U_STRING_INIT(k_type_plugin_collation
, "process(collation)", 18);
1912 U_STRING_INIT(k_type_plugin_transliterator
, "process(transliterator)", 23);
1913 U_STRING_INIT(k_type_plugin_dependency
, "process(dependency)", 19);
1916 static inline UBool
isTable(enum EResourceType type
) {
1917 return (UBool
)(type
==RESTYPE_TABLE
|| type
==RESTYPE_TABLE_NO_FALLBACK
);
1920 static enum EResourceType
1921 parseResourceType(ParseState
* state
, UErrorCode
*status
)
1923 struct UString
*tokenValue
;
1924 struct UString comment
;
1925 enum EResourceType result
= RESTYPE_UNKNOWN
;
1927 ustr_init(&comment
);
1928 expect(state
, TOK_STRING
, &tokenValue
, &comment
, &line
, status
);
1930 if (U_FAILURE(*status
))
1932 return RESTYPE_UNKNOWN
;
1935 *status
= U_ZERO_ERROR
;
1937 /* Search for normal types */
1938 result
=RESTYPE_UNKNOWN
;
1939 while ((result
=(EResourceType
)(result
+1)) < RESTYPE_RESERVED
) {
1940 if (u_strcmp(tokenValue
->fChars
, gResourceTypes
[result
].nameUChars
) == 0) {
1944 /* Now search for the aliases */
1945 if (u_strcmp(tokenValue
->fChars
, k_type_int
) == 0) {
1946 result
= RESTYPE_INTEGER
;
1948 else if (u_strcmp(tokenValue
->fChars
, k_type_bin
) == 0) {
1949 result
= RESTYPE_BINARY
;
1951 else if (result
== RESTYPE_RESERVED
) {
1952 char tokenBuffer
[1024];
1953 u_austrncpy(tokenBuffer
, tokenValue
->fChars
, sizeof(tokenBuffer
));
1954 tokenBuffer
[sizeof(tokenBuffer
) - 1] = 0;
1955 *status
= U_INVALID_FORMAT_ERROR
;
1956 error(line
, "unknown resource type '%s'", tokenBuffer
);
1962 /* parse a non-top-level resource */
1963 static struct SResource
*
1964 parseResource(ParseState
* state
, char *tag
, const struct UString
*comment
, UErrorCode
*status
)
1966 enum ETokenType token
;
1967 enum EResourceType resType
= RESTYPE_UNKNOWN
;
1968 ParseResourceFunction
*parseFunction
= NULL
;
1969 struct UString
*tokenValue
;
1974 token
= getToken(state
, &tokenValue
, NULL
, &startline
, status
);
1977 printf(" resource %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1980 /* name . [ ':' type ] '{' resource '}' */
1981 /* This function parses from the colon onwards. If the colon is present, parse the
1982 type then try to parse a resource of that type. If there is no explicit type,
1983 work it out using the lookahead tokens. */
1987 *status
= U_INVALID_FORMAT_ERROR
;
1988 error(startline
, "Unexpected EOF encountered");
1992 *status
= U_INVALID_FORMAT_ERROR
;
1996 resType
= parseResourceType(state
, status
);
1997 expect(state
, TOK_OPEN_BRACE
, &tokenValue
, NULL
, &startline
, status
);
1999 if (U_FAILURE(*status
))
2006 case TOK_OPEN_BRACE
:
2010 *status
= U_INVALID_FORMAT_ERROR
;
2011 error(startline
, "syntax error while reading a resource, expected '{' or ':'");
2016 if (resType
== RESTYPE_UNKNOWN
)
2018 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
2019 We could have any of the following:
2020 { { => array (nested)
2022 { string , => string array
2026 { string :/{ => table
2027 { string } => string
2030 token
= peekToken(state
, 0, NULL
, &line
, NULL
,status
);
2032 if (U_FAILURE(*status
))
2037 if (token
== TOK_OPEN_BRACE
|| token
== TOK_COLON
||token
==TOK_CLOSE_BRACE
)
2039 resType
= RESTYPE_ARRAY
;
2041 else if (token
== TOK_STRING
)
2043 token
= peekToken(state
, 1, NULL
, &line
, NULL
, status
);
2045 if (U_FAILURE(*status
))
2052 case TOK_COMMA
: resType
= RESTYPE_ARRAY
; break;
2053 case TOK_OPEN_BRACE
: resType
= RESTYPE_TABLE
; break;
2054 case TOK_CLOSE_BRACE
: resType
= RESTYPE_STRING
; break;
2055 case TOK_COLON
: resType
= RESTYPE_TABLE
; break;
2057 *status
= U_INVALID_FORMAT_ERROR
;
2058 error(line
, "Unexpected token after string, expected ',', '{' or '}'");
2064 *status
= U_INVALID_FORMAT_ERROR
;
2065 error(line
, "Unexpected token after '{'");
2069 /* printf("Type guessed as %s\n", resourceNames[resType]); */
2070 } else if(resType
== RESTYPE_TABLE_NO_FALLBACK
) {
2071 *status
= U_INVALID_FORMAT_ERROR
;
2072 error(startline
, "error: %s resource type not valid except on top bundle level", gResourceTypes
[resType
].nameChars
);
2077 /* We should now know what we need to parse next, so call the appropriate parser
2078 function and return. */
2079 parseFunction
= gResourceTypes
[resType
].parseFunction
;
2080 if (parseFunction
!= NULL
) {
2081 return parseFunction(state
, tag
, startline
, comment
, status
);
2084 *status
= U_INTERNAL_PROGRAM_ERROR
;
2085 error(startline
, "internal error: %s resource type found and not handled", gResourceTypes
[resType
].nameChars
);
2091 /* parse the top-level resource */
2093 parse(UCHARBUF
*buf
, const char *inputDir
, const char *outputDir
, const char *filename
,
2094 UBool makeBinaryCollation
, UBool omitCollationRules
, UErrorCode
*status
)
2096 struct UString
*tokenValue
;
2097 struct UString comment
;
2099 enum EResourceType bundleType
;
2100 enum ETokenType token
;
2105 for (i
= 0; i
< MAX_LOOKAHEAD
+ 1; i
++)
2107 ustr_init(&state
.lookahead
[i
].value
);
2108 ustr_init(&state
.lookahead
[i
].comment
);
2111 initLookahead(&state
, buf
, status
);
2113 state
.inputdir
= inputDir
;
2114 state
.inputdirLength
= (state
.inputdir
!= NULL
) ? (uint32_t)uprv_strlen(state
.inputdir
) : 0;
2115 state
.outputdir
= outputDir
;
2116 state
.outputdirLength
= (state
.outputdir
!= NULL
) ? (uint32_t)uprv_strlen(state
.outputdir
) : 0;
2117 state
.filename
= filename
;
2118 state
.makeBinaryCollation
= makeBinaryCollation
;
2119 state
.omitCollationRules
= omitCollationRules
;
2121 ustr_init(&comment
);
2122 expect(&state
, TOK_STRING
, &tokenValue
, &comment
, NULL
, status
);
2124 state
.bundle
= bundle_open(&comment
, FALSE
, status
);
2126 if (state
.bundle
== NULL
|| U_FAILURE(*status
))
2132 bundle_setlocale(state
.bundle
, tokenValue
->fChars
, status
);
2134 /* The following code is to make Empty bundle work no matter with :table specifer or not */
2135 token
= getToken(&state
, NULL
, NULL
, &line
, status
);
2136 if(token
==TOK_COLON
) {
2137 *status
=U_ZERO_ERROR
;
2138 bundleType
=parseResourceType(&state
, status
);
2140 if(isTable(bundleType
))
2142 expect(&state
, TOK_OPEN_BRACE
, NULL
, NULL
, &line
, status
);
2146 *status
=U_PARSE_ERROR
;
2147 error(line
, "parse error. Stopped parsing with %s", u_errorName(*status
));
2153 if(token
==TOK_OPEN_BRACE
)
2155 *status
=U_ZERO_ERROR
;
2156 bundleType
=RESTYPE_TABLE
;
2160 /* neither colon nor open brace */
2161 *status
=U_PARSE_ERROR
;
2162 bundleType
=RESTYPE_UNKNOWN
;
2163 error(line
, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status
));
2167 if (U_FAILURE(*status
))
2169 bundle_close(state
.bundle
, status
);
2173 if(bundleType
==RESTYPE_TABLE_NO_FALLBACK
) {
2175 * Parse a top-level table with the table(nofallback) declaration.
2176 * This is the same as a regular table, but also sets the
2177 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2179 state
.bundle
->noFallback
=TRUE
;
2181 /* top-level tables need not handle special table names like "collations" */
2182 realParseTable(&state
, state
.bundle
->fRoot
, NULL
, line
, status
);
2183 if(dependencyArray
!=NULL
){
2184 table_add(state
.bundle
->fRoot
, dependencyArray
, 0, status
);
2185 dependencyArray
= NULL
;
2187 if (U_FAILURE(*status
))
2189 bundle_close(state
.bundle
, status
);
2190 res_close(dependencyArray
);
2194 if (getToken(&state
, NULL
, NULL
, &line
, status
) != TOK_EOF
)
2196 warning(line
, "extraneous text after resource bundle (perhaps unmatched braces)");
2198 *status
= U_INVALID_FORMAT_ERROR
;
2203 cleanupLookahead(&state
);
2204 ustr_deinit(&comment
);
2205 return state
.bundle
;