1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 1998-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
13 * Modification History:
15 * Date Name Description
16 * 05/26/99 stephen Creation.
17 * 02/25/00 weiv Overhaul to write udata
18 * 5/10/01 Ram removed ustdio dependency
19 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
20 *******************************************************************************
23 // Safer use of UnicodeString.
24 #ifndef UNISTR_FROM_CHAR_EXPLICIT
25 # define UNISTR_FROM_CHAR_EXPLICIT explicit
28 // Less important, but still a good idea.
29 #ifndef UNISTR_FROM_STRING_EXPLICIT
30 # define UNISTR_FROM_STRING_EXPLICIT explicit
45 #include "unicode/stringpiece.h"
46 #include "unicode/unistr.h"
47 #include "unicode/ustring.h"
48 #include "unicode/uscript.h"
49 #include "unicode/utf16.h"
50 #include "unicode/putil.h"
52 #include "collationbuilder.h"
53 #include "collationdata.h"
54 #include "collationdatareader.h"
55 #include "collationdatawriter.h"
56 #include "collationfastlatinbuilder.h"
57 #include "collationinfo.h"
58 #include "collationroot.h"
59 #include "collationruleparser.h"
60 #include "collationtailoring.h"
63 /* Number of tokens to read ahead of the current stream position */
64 #define MAX_LOOKAHEAD 3
74 #define STARTCOMMAND 0x005B
75 #define ENDCOMMAND 0x005D
76 #define OPENSQBRACKET 0x005B
77 #define CLOSESQBRACKET 0x005D
79 using icu::CharString
;
80 using icu::LocalMemory
;
81 using icu::LocalPointer
;
82 using icu::LocalUCHARBUFPointer
;
83 using icu::StringPiece
;
84 using icu::UnicodeString
;
90 struct UString comment
;
94 /* keep in sync with token defines in read.h */
95 const char *tokenNames
[TOK_TOKEN_COUNT
] =
97 "string", /* A string token, such as "MonthNames" */
98 "'{'", /* An opening brace character */
99 "'}'", /* A closing brace character */
103 "<end of file>", /* End of the file has been reached successfully */
107 /* Just to store "TRUE" */
108 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
111 struct Lookahead lookahead
[MAX_LOOKAHEAD
+ 1];
112 uint32_t lookaheadPosition
;
114 struct SRBRoot
*bundle
;
115 const char *inputdir
;
116 uint32_t inputdirLength
;
117 const char *outputdir
;
118 uint32_t outputdirLength
;
119 const char *filename
;
120 UBool makeBinaryCollation
;
121 UBool omitCollationRules
;
124 typedef struct SResource
*
125 ParseResourceFunction(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
);
127 static struct SResource
*parseResource(ParseState
* state
, char *tag
, const struct UString
*comment
, UErrorCode
*status
);
129 /* The nature of the lookahead buffer:
130 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
131 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
132 When getToken is called, the current pointer is moved to the next slot and the
133 old slot is filled with the next token from the reader by calling getNextToken.
134 The token values are stored in the slot, which means that token values don't
135 survive a call to getToken, ie.
139 getToken(&value, NULL, status);
140 getToken(NULL, NULL, status); bad - value is now a different string
143 initLookahead(ParseState
* state
, UCHARBUF
*buf
, UErrorCode
*status
)
145 static uint32_t initTypeStrings
= 0;
148 if (!initTypeStrings
)
153 state
->lookaheadPosition
= 0;
158 for (i
= 0; i
< MAX_LOOKAHEAD
; i
++)
160 state
->lookahead
[i
].type
= getNextToken(state
->buffer
, &state
->lookahead
[i
].value
, &state
->lookahead
[i
].line
, &state
->lookahead
[i
].comment
, status
);
161 if (U_FAILURE(*status
))
167 *status
= U_ZERO_ERROR
;
171 cleanupLookahead(ParseState
* state
)
174 for (i
= 0; i
<= MAX_LOOKAHEAD
; i
++)
176 ustr_deinit(&state
->lookahead
[i
].value
);
177 ustr_deinit(&state
->lookahead
[i
].comment
);
182 static enum ETokenType
183 getToken(ParseState
* state
, struct UString
**tokenValue
, struct UString
* comment
, uint32_t *linenumber
, UErrorCode
*status
)
185 enum ETokenType result
;
188 result
= state
->lookahead
[state
->lookaheadPosition
].type
;
190 if (tokenValue
!= NULL
)
192 *tokenValue
= &state
->lookahead
[state
->lookaheadPosition
].value
;
195 if (linenumber
!= NULL
)
197 *linenumber
= state
->lookahead
[state
->lookaheadPosition
].line
;
202 ustr_cpy(comment
, &(state
->lookahead
[state
->lookaheadPosition
].comment
), status
);
205 i
= (state
->lookaheadPosition
+ MAX_LOOKAHEAD
) % (MAX_LOOKAHEAD
+ 1);
206 state
->lookaheadPosition
= (state
->lookaheadPosition
+ 1) % (MAX_LOOKAHEAD
+ 1);
207 ustr_setlen(&state
->lookahead
[i
].comment
, 0, status
);
208 ustr_setlen(&state
->lookahead
[i
].value
, 0, status
);
209 state
->lookahead
[i
].type
= getNextToken(state
->buffer
, &state
->lookahead
[i
].value
, &state
->lookahead
[i
].line
, &state
->lookahead
[i
].comment
, status
);
211 /* printf("getToken, returning %s\n", tokenNames[result]); */
216 static enum ETokenType
217 peekToken(ParseState
* state
, uint32_t lookaheadCount
, struct UString
**tokenValue
, uint32_t *linenumber
, struct UString
*comment
, UErrorCode
*status
)
219 uint32_t i
= (state
->lookaheadPosition
+ lookaheadCount
) % (MAX_LOOKAHEAD
+ 1);
221 if (U_FAILURE(*status
))
226 if (lookaheadCount
>= MAX_LOOKAHEAD
)
228 *status
= U_INTERNAL_PROGRAM_ERROR
;
232 if (tokenValue
!= NULL
)
234 *tokenValue
= &state
->lookahead
[i
].value
;
237 if (linenumber
!= NULL
)
239 *linenumber
= state
->lookahead
[i
].line
;
243 ustr_cpy(comment
, &(state
->lookahead
[state
->lookaheadPosition
].comment
), status
);
246 return state
->lookahead
[i
].type
;
250 expect(ParseState
* state
, enum ETokenType expectedToken
, struct UString
**tokenValue
, struct UString
*comment
, uint32_t *linenumber
, UErrorCode
*status
)
254 enum ETokenType token
= getToken(state
, tokenValue
, comment
, &line
, status
);
256 if (linenumber
!= NULL
)
261 if (U_FAILURE(*status
))
266 if (token
!= expectedToken
)
268 *status
= U_INVALID_FORMAT_ERROR
;
269 error(line
, "expecting %s, got %s", tokenNames
[expectedToken
], tokenNames
[token
]);
273 *status
= U_ZERO_ERROR
;
277 static char *getInvariantString(ParseState
* state
, uint32_t *line
, struct UString
*comment
, UErrorCode
*status
)
279 struct UString
*tokenValue
;
283 expect(state
, TOK_STRING
, &tokenValue
, comment
, line
, status
);
285 if (U_FAILURE(*status
))
290 count
= u_strlen(tokenValue
->fChars
);
291 if(!uprv_isInvariantUString(tokenValue
->fChars
, count
)) {
292 *status
= U_INVALID_FORMAT_ERROR
;
293 error(*line
, "invariant characters required for table keys, binary data, etc.");
297 result
= static_cast<char *>(uprv_malloc(count
+1));
301 *status
= U_MEMORY_ALLOCATION_ERROR
;
305 u_UCharsToChars(tokenValue
->fChars
, result
, count
+1);
309 static struct SResource
*
310 parseUCARules(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* /*comment*/, UErrorCode
*status
)
312 struct SResource
*result
= NULL
;
313 struct UString
*tokenValue
;
314 FileStream
*file
= NULL
;
315 char filename
[256] = { '\0' };
316 char cs
[128] = { '\0' };
318 UBool quoted
= FALSE
;
319 UCHARBUF
*ucbuf
=NULL
;
321 const char* cp
= NULL
;
322 UChar
*pTarget
= NULL
;
323 UChar
*target
= NULL
;
324 UChar
*targetLimit
= NULL
;
327 expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
);
330 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
333 if (U_FAILURE(*status
))
337 /* make the filename including the directory */
338 if (state
->inputdir
!= NULL
)
340 uprv_strcat(filename
, state
->inputdir
);
342 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
344 uprv_strcat(filename
, U_FILE_SEP_STRING
);
348 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
350 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
352 if (U_FAILURE(*status
))
356 uprv_strcat(filename
, cs
);
358 if(state
->omitCollationRules
) {
362 ucbuf
= ucbuf_open(filename
, &cp
, getShowWarning(),FALSE
, status
);
364 if (U_FAILURE(*status
)) {
365 error(line
, "An error occurred while opening the input file %s\n", filename
);
369 /* We allocate more space than actually required
370 * since the actual size needed for storing UChars
371 * is not known in UTF-8 byte stream
373 size
= ucbuf_size(ucbuf
) + 1;
374 pTarget
= (UChar
*) uprv_malloc(U_SIZEOF_UCHAR
* size
);
375 uprv_memset(pTarget
, 0, size
*U_SIZEOF_UCHAR
);
377 targetLimit
= pTarget
+size
;
379 /* read the rules into the buffer */
380 while (target
< targetLimit
)
382 c
= ucbuf_getc(ucbuf
, status
);
384 quoted
= (UBool
)!quoted
;
386 /* weiv (06/26/2002): adding the following:
387 * - preserving spaces in commands [...]
388 * - # comments until the end of line
390 if (c
== STARTCOMMAND
&& !quoted
)
393 * closing bracket will be handled by the
394 * append at the end of the loop
396 while(c
!= ENDCOMMAND
) {
397 U_APPEND_CHAR32_ONLY(c
, target
);
398 c
= ucbuf_getc(ucbuf
, status
);
401 else if (c
== HASH
&& !quoted
) {
403 while(c
!= CR
&& c
!= LF
) {
404 c
= ucbuf_getc(ucbuf
, status
);
408 else if (c
== ESCAPE
)
410 c
= unescape(ucbuf
, status
);
412 if (c
== (UChar32
)U_ERR
)
415 T_FileStream_close(file
);
419 else if (!quoted
&& (c
== SPACE
|| c
== TAB
|| c
== CR
|| c
== LF
))
421 /* ignore spaces carriage returns
422 * and line feed unless in the form \uXXXX
427 /* Append UChar * after dissembling if c > 0xffff*/
428 if (c
!= (UChar32
)U_EOF
)
430 U_APPEND_CHAR32_ONLY(c
, target
);
438 /* terminate the string */
439 if(target
< targetLimit
){
443 result
= string_open(state
->bundle
, tag
, pTarget
, (int32_t)(target
- pTarget
), NULL
, status
);
448 T_FileStream_close(file
);
453 static struct SResource
*
454 parseTransliterator(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* /*comment*/, UErrorCode
*status
)
456 struct SResource
*result
= NULL
;
457 struct UString
*tokenValue
;
458 FileStream
*file
= NULL
;
459 char filename
[256] = { '\0' };
460 char cs
[128] = { '\0' };
462 UCHARBUF
*ucbuf
=NULL
;
463 const char* cp
= NULL
;
464 UChar
*pTarget
= NULL
;
465 const UChar
*pSource
= NULL
;
468 expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
);
471 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
474 if (U_FAILURE(*status
))
478 /* make the filename including the directory */
479 if (state
->inputdir
!= NULL
)
481 uprv_strcat(filename
, state
->inputdir
);
483 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
485 uprv_strcat(filename
, U_FILE_SEP_STRING
);
489 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
491 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
493 if (U_FAILURE(*status
))
497 uprv_strcat(filename
, cs
);
500 ucbuf
= ucbuf_open(filename
, &cp
, getShowWarning(),FALSE
, status
);
502 if (U_FAILURE(*status
)) {
503 error(line
, "An error occurred while opening the input file %s\n", filename
);
507 /* We allocate more space than actually required
508 * since the actual size needed for storing UChars
509 * is not known in UTF-8 byte stream
511 pSource
= ucbuf_getBuffer(ucbuf
, &size
, status
);
512 pTarget
= (UChar
*) uprv_malloc(U_SIZEOF_UCHAR
* (size
+ 1));
513 uprv_memset(pTarget
, 0, size
*U_SIZEOF_UCHAR
);
515 #if !UCONFIG_NO_TRANSLITERATION
516 size
= utrans_stripRules(pSource
, size
, pTarget
, status
);
519 fprintf(stderr
, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
521 result
= string_open(state
->bundle
, tag
, pTarget
, size
, NULL
, status
);
525 T_FileStream_close(file
);
529 static ArrayResource
* dependencyArray
= NULL
;
531 static struct SResource
*
532 parseDependency(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
534 struct SResource
*result
= NULL
;
535 struct SResource
*elem
= NULL
;
536 struct UString
*tokenValue
;
538 char filename
[256] = { '\0' };
539 char cs
[128] = { '\0' };
541 expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
);
544 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
547 if (U_FAILURE(*status
))
551 /* make the filename including the directory */
552 if (state
->outputdir
!= NULL
)
554 uprv_strcat(filename
, state
->outputdir
);
556 if (state
->outputdir
[state
->outputdirLength
- 1] != U_FILE_SEP_CHAR
)
558 uprv_strcat(filename
, U_FILE_SEP_STRING
);
562 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
564 if (U_FAILURE(*status
))
568 uprv_strcat(filename
, cs
);
569 if(!T_FileStream_file_exists(filename
)){
571 error(line
, "The dependency file %s does not exist. Please make sure it exists.\n",filename
);
573 warning(line
, "The dependency file %s does not exist. Please make sure it exists.\n",filename
);
576 if(dependencyArray
==NULL
){
577 dependencyArray
= array_open(state
->bundle
, "%%DEPENDENCY", NULL
, status
);
580 result
= string_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
582 elem
= string_open(state
->bundle
, NULL
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
584 dependencyArray
->add(elem
);
586 if (U_FAILURE(*status
))
590 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
593 static struct SResource
*
594 parseString(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
596 struct UString
*tokenValue
;
597 struct SResource
*result
= NULL
;
599 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
601 return parseUCARules(tag, startline, status);
604 printf(" string %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
606 expect(state
, TOK_STRING
, &tokenValue
, NULL
, NULL
, status
);
608 if (U_SUCCESS(*status
))
610 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
611 doesn't survive expect either) */
613 result
= string_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
614 if(U_SUCCESS(*status
) && result
) {
615 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
617 if (U_FAILURE(*status
))
628 static struct SResource
*
629 parseAlias(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
631 struct UString
*tokenValue
;
632 struct SResource
*result
= NULL
;
634 expect(state
, TOK_STRING
, &tokenValue
, NULL
, NULL
, status
);
637 printf(" alias %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
640 if (U_SUCCESS(*status
))
642 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
643 doesn't survive expect either) */
645 result
= alias_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
647 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
649 if (U_FAILURE(*status
))
659 #if !UCONFIG_NO_COLLATION
663 static struct SResource
* resLookup(struct SResource
* res
, const char* key
){
664 if (res
== res_none() || !res
->isTable()) {
668 TableResource
*list
= static_cast<TableResource
*>(res
);
669 SResource
*current
= list
->fFirst
;
670 while (current
!= NULL
) {
671 if (uprv_strcmp(((list
->fRoot
->fKeys
) + (current
->fKey
)), key
) == 0) {
674 current
= current
->fNext
;
679 class GenrbImporter
: public icu::CollationRuleParser::Importer
{
681 GenrbImporter(const char *in
, const char *out
) : inputDir(in
), outputDir(out
) {}
682 virtual ~GenrbImporter();
683 virtual void getRules(
684 const char *localeID
, const char *collationType
,
685 UnicodeString
&rules
,
686 const char *&errorReason
, UErrorCode
&errorCode
);
689 const char *inputDir
;
690 const char *outputDir
;
693 GenrbImporter::~GenrbImporter() {}
696 GenrbImporter::getRules(
697 const char *localeID
, const char *collationType
,
698 UnicodeString
&rules
,
699 const char *& /*errorReason*/, UErrorCode
&errorCode
) {
700 CharString
filename(localeID
, errorCode
);
701 for(int32_t i
= 0; i
< filename
.length(); i
++){
702 if(filename
[i
] == '-'){
703 filename
.data()[i
] = '_';
706 filename
.append(".txt", errorCode
);
707 if (U_FAILURE(errorCode
)) {
710 CharString inputDirBuf
;
711 CharString openFileName
;
712 if(inputDir
== NULL
) {
713 const char *filenameBegin
= uprv_strrchr(filename
.data(), U_FILE_SEP_CHAR
);
714 if (filenameBegin
!= NULL
) {
716 * When a filename ../../../data/root.txt is specified,
717 * we presume that the input directory is ../../../data
718 * This is very important when the resource file includes
719 * another file, like UCARules.txt or thaidict.brk.
721 StringPiece dir
= filename
.toStringPiece();
722 const char *filenameLimit
= filename
.data() + filename
.length();
723 dir
.remove_suffix((int32_t)(filenameLimit
- filenameBegin
));
724 inputDirBuf
.append(dir
, errorCode
);
725 inputDir
= inputDirBuf
.data();
728 int32_t dirlen
= (int32_t)uprv_strlen(inputDir
);
730 if((filename
[0] != U_FILE_SEP_CHAR
) && (inputDir
[dirlen
-1] !='.')) {
732 * append the input dir to openFileName if the first char in
733 * filename is not file separator char and the last char input directory is not '.'.
734 * This is to support :
735 * genrb -s. /home/icu/data
737 * The user cannot mix notations like
738 * genrb -s. /icu/data --- the absolute path specified. -s redundant
740 * genrb -s. icu/data --- start from CWD and look in icu/data dir
742 openFileName
.append(inputDir
, dirlen
, errorCode
);
743 if(inputDir
[dirlen
-1] != U_FILE_SEP_CHAR
) {
744 openFileName
.append(U_FILE_SEP_CHAR
, errorCode
);
748 openFileName
.append(filename
, errorCode
);
749 if(U_FAILURE(errorCode
)) {
752 // printf("GenrbImporter::getRules(%s, %s) reads %s\n", localeID, collationType, openFileName.data());
754 LocalUCHARBUFPointer
ucbuf(
755 ucbuf_open(openFileName
.data(), &cp
, getShowWarning(), TRUE
, &errorCode
));
756 if(errorCode
== U_FILE_ACCESS_ERROR
) {
757 fprintf(stderr
, "couldn't open file %s\n", openFileName
.data());
760 if (ucbuf
.isNull() || U_FAILURE(errorCode
)) {
761 fprintf(stderr
, "An error occurred processing file %s. Error: %s\n", openFileName
.data(), u_errorName(errorCode
));
765 /* Parse the data into an SRBRoot */
766 LocalPointer
<SRBRoot
> data(
767 parse(ucbuf
.getAlias(), inputDir
, outputDir
, filename
.data(), FALSE
, FALSE
, &errorCode
));
768 if (U_FAILURE(errorCode
)) {
772 struct SResource
*root
= data
->fRoot
;
773 struct SResource
*collations
= resLookup(root
, "collations");
774 if (collations
!= NULL
) {
775 struct SResource
*collation
= resLookup(collations
, collationType
);
776 if (collation
!= NULL
) {
777 struct SResource
*sequence
= resLookup(collation
, "Sequence");
778 if (sequence
!= NULL
&& sequence
->isString()) {
779 // No string pointer aliasing so that we need not hold onto the resource bundle.
780 StringResource
*sr
= static_cast<StringResource
*>(sequence
);
787 // Quick-and-dirty escaping function.
788 // Assumes that we are on an ASCII-based platform.
790 escape(const UChar
*s
, char *buffer
) {
791 int32_t length
= u_strlen(s
);
795 U16_NEXT(s
, i
, length
, c
);
799 } else if (0x20 <= c
&& c
<= 0x7e) {
801 *buffer
++ = (char)c
; // assumes ASCII-based platform
803 buffer
+= sprintf(buffer
, "\\u%04X", (int)c
);
810 #endif // !UCONFIG_NO_COLLATION
812 static TableResource
*
813 addCollation(ParseState
* state
, TableResource
*result
, const char *collationType
,
814 uint32_t startline
, UErrorCode
*status
)
816 // TODO: Use LocalPointer for result, or make caller close it when there is a failure.
817 struct SResource
*member
= NULL
;
818 struct UString
*tokenValue
;
819 struct UString comment
;
820 enum ETokenType token
;
823 UBool haveRules
= FALSE
;
824 UVersionInfo version
;
827 /* '{' . (name resource)* '}' */
828 version
[0]=0; version
[1]=0; version
[2]=0; version
[3]=0;
833 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
835 if (token
== TOK_CLOSE_BRACE
)
840 if (token
!= TOK_STRING
)
843 *status
= U_INVALID_FORMAT_ERROR
;
845 if (token
== TOK_EOF
)
847 error(startline
, "unterminated table");
851 error(line
, "Unexpected token %s", tokenNames
[token
]);
857 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
859 if (U_FAILURE(*status
))
865 member
= parseResource(state
, subtag
, NULL
, status
);
867 if (U_FAILURE(*status
))
874 // Ignore the parsed resources, continue parsing.
876 else if (uprv_strcmp(subtag
, "Version") == 0 && member
->isString())
878 StringResource
*sr
= static_cast<StringResource
*>(member
);
880 int32_t length
= sr
->length();
882 if (length
>= UPRV_LENGTHOF(ver
))
884 length
= UPRV_LENGTHOF(ver
) - 1;
887 sr
->fString
.extract(0, length
, ver
, UPRV_LENGTHOF(ver
), US_INV
);
888 u_versionFromString(version
, ver
);
890 result
->add(member
, line
, *status
);
893 else if(uprv_strcmp(subtag
, "%%CollationBin")==0)
895 /* discard duplicate %%CollationBin if any*/
897 else if (uprv_strcmp(subtag
, "Sequence") == 0 && member
->isString())
899 StringResource
*sr
= static_cast<StringResource
*>(member
);
902 // Defer building the collator until we have seen
903 // all sub-elements of the collation table, including the Version.
904 /* in order to achieve smaller data files, we can direct genrb */
905 /* to omit collation rules */
906 if(!state
->omitCollationRules
) {
907 result
->add(member
, line
, *status
);
911 else // Just copy non-special items.
913 result
->add(member
, line
, *status
);
916 res_close(member
); // TODO: use LocalPointer
917 if (U_FAILURE(*status
))
924 if (!haveRules
) { return result
; }
926 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
927 warning(line
, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
930 // CLDR ticket #3949, ICU ticket #8082:
931 // Do not build collation binary data for for-import-only "private" collation rule strings.
932 if (uprv_strncmp(collationType
, "private-", 8) == 0) {
934 printf("Not building %s~%s collation binary\n", state
->filename
, collationType
);
939 if(!state
->makeBinaryCollation
) {
941 printf("Not building %s~%s collation binary\n", state
->filename
, collationType
);
945 UErrorCode intStatus
= U_ZERO_ERROR
;
946 UParseError parseError
;
947 uprv_memset(&parseError
, 0, sizeof(parseError
));
948 GenrbImporter
importer(state
->inputdir
, state
->outputdir
);
949 const icu::CollationTailoring
*base
= icu::CollationRoot::getRoot(intStatus
);
950 if(U_FAILURE(intStatus
)) {
951 error(line
, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus
));
953 return NULL
; // TODO: use LocalUResourceBundlePointer for result
955 icu::CollationBuilder
builder(base
, intStatus
);
956 if(uprv_strncmp(collationType
, "search", 6) == 0) {
957 builder
.disableFastLatin(); // build fast-Latin table unless search collator
959 LocalPointer
<icu::CollationTailoring
> t(
960 builder
.parseAndBuild(rules
, version
, &importer
, &parseError
, intStatus
));
961 if(U_FAILURE(intStatus
)) {
962 const char *reason
= builder
.getErrorReason();
963 if(reason
== NULL
) { reason
= ""; }
964 error(line
, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s %s",
965 state
->filename
, collationType
,
966 (long)parseError
.offset
, u_errorName(intStatus
), reason
);
967 if(parseError
.preContext
[0] != 0 || parseError
.postContext
[0] != 0) {
968 // Print pre- and post-context.
969 char preBuffer
[100], postBuffer
[100];
970 escape(parseError
.preContext
, preBuffer
);
971 escape(parseError
.postContext
, postBuffer
);
972 error(line
, " error context: \"...%s\" ! \"%s...\"", preBuffer
, postBuffer
);
974 if(isStrict() || t
.isNull()) {
980 icu::LocalMemory
<uint8_t> buffer
;
981 int32_t capacity
= 100000;
982 uint8_t *dest
= buffer
.allocateInsteadAndCopy(capacity
);
984 fprintf(stderr
, "memory allocation (%ld bytes) for file contents failed\n",
986 *status
= U_MEMORY_ALLOCATION_ERROR
;
990 int32_t indexes
[icu::CollationDataReader::IX_TOTAL_SIZE
+ 1];
991 int32_t totalSize
= icu::CollationDataWriter::writeTailoring(
992 *t
, *t
->settings
, indexes
, dest
, capacity
, intStatus
);
993 if(intStatus
== U_BUFFER_OVERFLOW_ERROR
) {
994 intStatus
= U_ZERO_ERROR
;
995 capacity
= totalSize
;
996 dest
= buffer
.allocateInsteadAndCopy(capacity
);
998 fprintf(stderr
, "memory allocation (%ld bytes) for file contents failed\n",
1000 *status
= U_MEMORY_ALLOCATION_ERROR
;
1004 totalSize
= icu::CollationDataWriter::writeTailoring(
1005 *t
, *t
->settings
, indexes
, dest
, capacity
, intStatus
);
1007 if(U_FAILURE(intStatus
)) {
1008 fprintf(stderr
, "CollationDataWriter::writeTailoring() failed: %s\n",
1009 u_errorName(intStatus
));
1014 printf("%s~%s collation tailoring part sizes:\n", state
->filename
, collationType
);
1015 icu::CollationInfo::printSizes(totalSize
, indexes
);
1016 if(t
->settings
->hasReordering()) {
1017 printf("%s~%s collation reordering ranges:\n", state
->filename
, collationType
);
1018 icu::CollationInfo::printReorderRanges(
1019 *t
->data
, t
->settings
->reorderCodes
, t
->settings
->reorderCodesLength
);
1022 struct SResource
*collationBin
= bin_open(state
->bundle
, "%%CollationBin", totalSize
, dest
, NULL
, NULL
, status
);
1023 result
->add(collationBin
, line
, *status
);
1024 if (U_FAILURE(*status
)) {
1033 keepCollationType(const char * /*type*/) {
1037 static struct SResource
*
1038 parseCollationElements(ParseState
* state
, char *tag
, uint32_t startline
, UBool newCollation
, UErrorCode
*status
)
1040 TableResource
*result
= NULL
;
1041 struct SResource
*member
= NULL
;
1042 struct UString
*tokenValue
;
1043 struct UString comment
;
1044 enum ETokenType token
;
1045 char subtag
[1024], typeKeyword
[1024];
1048 result
= table_open(state
->bundle
, tag
, NULL
, status
);
1050 if (result
== NULL
|| U_FAILURE(*status
))
1055 printf(" collation elements %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1058 return addCollation(state
, result
, "(no type)", startline
, status
);
1062 ustr_init(&comment
);
1063 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
1065 if (token
== TOK_CLOSE_BRACE
)
1070 if (token
!= TOK_STRING
)
1073 *status
= U_INVALID_FORMAT_ERROR
;
1075 if (token
== TOK_EOF
)
1077 error(startline
, "unterminated table");
1081 error(line
, "Unexpected token %s", tokenNames
[token
]);
1087 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
1089 if (U_FAILURE(*status
))
1095 if (uprv_strcmp(subtag
, "default") == 0)
1097 member
= parseResource(state
, subtag
, NULL
, status
);
1099 if (U_FAILURE(*status
))
1105 result
->add(member
, line
, *status
);
1109 token
= peekToken(state
, 0, &tokenValue
, &line
, &comment
, status
);
1110 /* this probably needs to be refactored or recursively use the parser */
1111 /* first we assume that our collation table won't have the explicit type */
1112 /* then, we cannot handle aliases */
1113 if(token
== TOK_OPEN_BRACE
) {
1114 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
1115 TableResource
*collationRes
;
1116 if (keepCollationType(subtag
)) {
1117 collationRes
= table_open(state
->bundle
, subtag
, NULL
, status
);
1119 collationRes
= NULL
;
1121 // need to parse the collation data regardless
1122 collationRes
= addCollation(state
, collationRes
, subtag
, startline
, status
);
1123 if (collationRes
!= NULL
) {
1124 result
->add(collationRes
, startline
, *status
);
1126 } else if(token
== TOK_COLON
) { /* right now, we'll just try to see if we have aliases */
1127 /* we could have a table too */
1128 token
= peekToken(state
, 1, &tokenValue
, &line
, &comment
, status
);
1129 u_UCharsToChars(tokenValue
->fChars
, typeKeyword
, u_strlen(tokenValue
->fChars
) + 1);
1130 if(uprv_strcmp(typeKeyword
, "alias") == 0) {
1131 member
= parseResource(state
, subtag
, NULL
, status
);
1132 if (U_FAILURE(*status
))
1138 result
->add(member
, line
, *status
);
1141 *status
= U_INVALID_FORMAT_ERROR
;
1146 *status
= U_INVALID_FORMAT_ERROR
;
1151 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1153 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1155 if (U_FAILURE(*status
))
1164 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1165 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1166 static struct SResource
*
1167 realParseTable(ParseState
* state
, TableResource
*table
, char *tag
, uint32_t startline
, UErrorCode
*status
)
1169 struct SResource
*member
= NULL
;
1170 struct UString
*tokenValue
=NULL
;
1171 struct UString comment
;
1172 enum ETokenType token
;
1175 UBool readToken
= FALSE
;
1177 /* '{' . (name resource)* '}' */
1180 printf(" parsing table %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1184 ustr_init(&comment
);
1185 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
1187 if (token
== TOK_CLOSE_BRACE
)
1190 warning(startline
, "Encountered empty table");
1195 if (token
!= TOK_STRING
)
1197 *status
= U_INVALID_FORMAT_ERROR
;
1199 if (token
== TOK_EOF
)
1201 error(startline
, "unterminated table");
1205 error(line
, "unexpected token %s", tokenNames
[token
]);
1211 if(uprv_isInvariantUString(tokenValue
->fChars
, -1)) {
1212 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
1214 *status
= U_INVALID_FORMAT_ERROR
;
1215 error(line
, "invariant characters required for table keys");
1219 if (U_FAILURE(*status
))
1221 error(line
, "parse error. Stopped parsing tokens with %s", u_errorName(*status
));
1225 member
= parseResource(state
, subtag
, &comment
, status
);
1227 if (member
== NULL
|| U_FAILURE(*status
))
1229 error(line
, "parse error. Stopped parsing resource with %s", u_errorName(*status
));
1233 table
->add(member
, line
, *status
);
1235 if (U_FAILURE(*status
))
1237 error(line
, "parse error. Stopped parsing table with %s", u_errorName(*status
));
1241 ustr_deinit(&comment
);
1245 /* A compiler warning will appear if all paths don't contain a return statement. */
1246 /* *status = U_INTERNAL_PROGRAM_ERROR;
1250 static struct SResource
*
1251 parseTable(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1253 if (tag
!= NULL
&& uprv_strcmp(tag
, "CollationElements") == 0)
1255 return parseCollationElements(state
, tag
, startline
, FALSE
, status
);
1257 if (tag
!= NULL
&& uprv_strcmp(tag
, "collations") == 0)
1259 return parseCollationElements(state
, tag
, startline
, TRUE
, status
);
1262 printf(" table %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1265 TableResource
*result
= table_open(state
->bundle
, tag
, comment
, status
);
1267 if (result
== NULL
|| U_FAILURE(*status
))
1271 return realParseTable(state
, result
, tag
, startline
, status
);
1274 static struct SResource
*
1275 parseArray(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1277 struct SResource
*member
= NULL
;
1278 struct UString
*tokenValue
;
1279 struct UString memberComments
;
1280 enum ETokenType token
;
1281 UBool readToken
= FALSE
;
1283 ArrayResource
*result
= array_open(state
->bundle
, tag
, comment
, status
);
1285 if (result
== NULL
|| U_FAILURE(*status
))
1290 printf(" array %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1293 ustr_init(&memberComments
);
1295 /* '{' . resource [','] '}' */
1299 ustr_setlen(&memberComments
, 0, status
);
1301 /* check for end of array, but don't consume next token unless it really is the end */
1302 token
= peekToken(state
, 0, &tokenValue
, NULL
, &memberComments
, status
);
1305 if (token
== TOK_CLOSE_BRACE
)
1307 getToken(state
, NULL
, NULL
, NULL
, status
);
1309 warning(startline
, "Encountered empty array");
1314 if (token
== TOK_EOF
)
1317 *status
= U_INVALID_FORMAT_ERROR
;
1318 error(startline
, "unterminated array");
1322 /* string arrays are a special case */
1323 if (token
== TOK_STRING
)
1325 getToken(state
, &tokenValue
, &memberComments
, NULL
, status
);
1326 member
= string_open(state
->bundle
, NULL
, tokenValue
->fChars
, tokenValue
->fLength
, &memberComments
, status
);
1330 member
= parseResource(state
, NULL
, &memberComments
, status
);
1333 if (member
== NULL
|| U_FAILURE(*status
))
1339 result
->add(member
);
1341 /* eat optional comma if present */
1342 token
= peekToken(state
, 0, NULL
, NULL
, NULL
, status
);
1344 if (token
== TOK_COMMA
)
1346 getToken(state
, NULL
, NULL
, NULL
, status
);
1349 if (U_FAILURE(*status
))
1357 ustr_deinit(&memberComments
);
1361 static struct SResource
*
1362 parseIntVector(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1364 enum ETokenType token
;
1367 UBool readToken
= FALSE
;
1370 struct UString memberComments
;
1372 IntVectorResource
*result
= intvector_open(state
->bundle
, tag
, comment
, status
);
1374 if (result
== NULL
|| U_FAILURE(*status
))
1380 printf(" vector %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1382 ustr_init(&memberComments
);
1383 /* '{' . string [','] '}' */
1386 ustr_setlen(&memberComments
, 0, status
);
1388 /* check for end of array, but don't consume next token unless it really is the end */
1389 token
= peekToken(state
, 0, NULL
, NULL
,&memberComments
, status
);
1391 if (token
== TOK_CLOSE_BRACE
)
1393 /* it's the end, consume the close brace */
1394 getToken(state
, NULL
, NULL
, NULL
, status
);
1396 warning(startline
, "Encountered empty int vector");
1398 ustr_deinit(&memberComments
);
1402 string
= getInvariantString(state
, NULL
, NULL
, status
);
1404 if (U_FAILURE(*status
))
1410 /* For handling illegal char in the Intvector */
1411 value
= uprv_strtoul(string
, &stopstring
, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1412 len
=(uint32_t)(stopstring
-string
);
1414 if(len
==uprv_strlen(string
))
1416 result
->add(value
, *status
);
1418 token
= peekToken(state
, 0, NULL
, NULL
, NULL
, status
);
1423 *status
=U_INVALID_CHAR_FOUND
;
1426 if (U_FAILURE(*status
))
1432 /* the comma is optional (even though it is required to prevent the reader from concatenating
1433 consecutive entries) so that a missing comma on the last entry isn't an error */
1434 if (token
== TOK_COMMA
)
1436 getToken(state
, NULL
, NULL
, NULL
, status
);
1442 /* A compiler warning will appear if all paths don't contain a return statement. */
1443 /* intvector_close(result, status);
1444 *status = U_INTERNAL_PROGRAM_ERROR;
1448 static struct SResource
*
1449 parseBinary(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1452 LocalMemory
<char> string(getInvariantString(state
, &line
, NULL
, status
));
1453 if (string
.isNull() || U_FAILURE(*status
))
1458 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1459 if (U_FAILURE(*status
))
1465 printf(" binary %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1468 uint32_t count
= (uint32_t)uprv_strlen(string
.getAlias());
1471 LocalMemory
<uint8_t> value
;
1472 if (value
.allocateInsteadAndCopy(count
) == NULL
)
1474 *status
= U_MEMORY_ALLOCATION_ERROR
;
1478 char toConv
[3] = {'\0', '\0', '\0'};
1479 for (uint32_t i
= 0; i
< count
; i
+= 2)
1481 toConv
[0] = string
[i
];
1482 toConv
[1] = string
[i
+ 1];
1485 value
[i
>> 1] = (uint8_t) uprv_strtoul(toConv
, &stopstring
, 16);
1486 uint32_t len
=(uint32_t)(stopstring
-toConv
);
1490 *status
=U_INVALID_CHAR_FOUND
;
1495 return bin_open(state
->bundle
, tag
, count
>> 1, value
.getAlias(), NULL
, comment
, status
);
1499 *status
= U_INVALID_CHAR_FOUND
;
1500 error(line
, "Encountered invalid binary value (length is odd)");
1506 warning(startline
, "Encountered empty binary value");
1507 return bin_open(state
->bundle
, tag
, 0, NULL
, "", comment
, status
);
1511 static struct SResource
*
1512 parseInteger(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1514 struct SResource
*result
= NULL
;
1520 string
= getInvariantString(state
, NULL
, NULL
, status
);
1522 if (string
== NULL
|| U_FAILURE(*status
))
1527 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1529 if (U_FAILURE(*status
))
1536 printf(" integer %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1539 if (uprv_strlen(string
) <= 0)
1541 warning(startline
, "Encountered empty integer. Default value is 0.");
1544 /* Allow integer support for hexdecimal, octal digit and decimal*/
1545 /* and handle illegal char in the integer*/
1546 value
= uprv_strtoul(string
, &stopstring
, 0);
1547 len
=(uint32_t)(stopstring
-string
);
1548 if(len
==uprv_strlen(string
))
1550 result
= int_open(state
->bundle
, tag
, value
, comment
, status
);
1554 *status
=U_INVALID_CHAR_FOUND
;
1561 static struct SResource
*
1562 parseImport(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
1565 LocalMemory
<char> filename(getInvariantString(state
, &line
, NULL
, status
));
1566 if (U_FAILURE(*status
))
1571 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1573 if (U_FAILURE(*status
))
1579 printf(" import %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1582 /* Open the input file for reading */
1583 CharString fullname
;
1584 if (state
->inputdir
!= NULL
) {
1585 fullname
.append(state
->inputdir
, *status
);
1587 fullname
.appendPathPart(filename
.getAlias(), *status
);
1588 if (U_FAILURE(*status
)) {
1592 FileStream
*file
= T_FileStream_open(fullname
.data(), "rb");
1595 error(line
, "couldn't open input file %s", filename
.getAlias());
1596 *status
= U_FILE_ACCESS_ERROR
;
1600 int32_t len
= T_FileStream_size(file
);
1601 LocalMemory
<uint8_t> data
;
1602 if(data
.allocateInsteadAndCopy(len
) == NULL
)
1604 *status
= U_MEMORY_ALLOCATION_ERROR
;
1605 T_FileStream_close (file
);
1609 /* int32_t numRead = */ T_FileStream_read(file
, data
.getAlias(), len
);
1610 T_FileStream_close (file
);
1612 return bin_open(state
->bundle
, tag
, len
, data
.getAlias(), fullname
.data(), comment
, status
);
1615 static struct SResource
*
1616 parseInclude(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
1618 struct SResource
*result
;
1622 UChar
*pTarget
= NULL
;
1625 char *fullname
= NULL
;
1627 const char* cp
= NULL
;
1628 const UChar
* uBuffer
= NULL
;
1630 filename
= getInvariantString(state
, &line
, NULL
, status
);
1631 count
= (int32_t)uprv_strlen(filename
);
1633 if (U_FAILURE(*status
))
1638 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1640 if (U_FAILURE(*status
))
1642 uprv_free(filename
);
1647 printf(" include %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1650 fullname
= (char *) uprv_malloc(state
->inputdirLength
+ count
+ 2);
1652 if(fullname
== NULL
)
1654 *status
= U_MEMORY_ALLOCATION_ERROR
;
1655 uprv_free(filename
);
1659 if(state
->inputdir
!=NULL
){
1660 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
1663 uprv_strcpy(fullname
, state
->inputdir
);
1665 fullname
[state
->inputdirLength
] = U_FILE_SEP_CHAR
;
1666 fullname
[state
->inputdirLength
+ 1] = '\0';
1668 uprv_strcat(fullname
, filename
);
1672 uprv_strcpy(fullname
, state
->inputdir
);
1673 uprv_strcat(fullname
, filename
);
1676 uprv_strcpy(fullname
,filename
);
1679 ucbuf
= ucbuf_open(fullname
, &cp
,getShowWarning(),FALSE
,status
);
1681 if (U_FAILURE(*status
)) {
1682 error(line
, "couldn't open input file %s\n", filename
);
1686 uBuffer
= ucbuf_getBuffer(ucbuf
,&len
,status
);
1687 result
= string_open(state
->bundle
, tag
, uBuffer
, len
, comment
, status
);
1693 uprv_free(filename
);
1694 uprv_free(fullname
);
1703 U_STRING_DECL(k_type_string
, "string", 6);
1704 U_STRING_DECL(k_type_binary
, "binary", 6);
1705 U_STRING_DECL(k_type_bin
, "bin", 3);
1706 U_STRING_DECL(k_type_table
, "table", 5);
1707 U_STRING_DECL(k_type_table_no_fallback
, "table(nofallback)", 17);
1708 U_STRING_DECL(k_type_int
, "int", 3);
1709 U_STRING_DECL(k_type_integer
, "integer", 7);
1710 U_STRING_DECL(k_type_array
, "array", 5);
1711 U_STRING_DECL(k_type_alias
, "alias", 5);
1712 U_STRING_DECL(k_type_intvector
, "intvector", 9);
1713 U_STRING_DECL(k_type_import
, "import", 6);
1714 U_STRING_DECL(k_type_include
, "include", 7);
1716 /* Various non-standard processing plugins that create one or more special resources. */
1717 U_STRING_DECL(k_type_plugin_uca_rules
, "process(uca_rules)", 18);
1718 U_STRING_DECL(k_type_plugin_collation
, "process(collation)", 18);
1719 U_STRING_DECL(k_type_plugin_transliterator
, "process(transliterator)", 23);
1720 U_STRING_DECL(k_type_plugin_dependency
, "process(dependency)", 19);
1722 typedef enum EResourceType
1728 RESTYPE_TABLE_NO_FALLBACK
,
1735 RESTYPE_PROCESS_UCA_RULES
,
1736 RESTYPE_PROCESS_COLLATION
,
1737 RESTYPE_PROCESS_TRANSLITERATOR
,
1738 RESTYPE_PROCESS_DEPENDENCY
,
1743 const char *nameChars
; /* only used for debugging */
1744 const UChar
*nameUChars
;
1745 ParseResourceFunction
*parseFunction
;
1746 } gResourceTypes
[] = {
1747 {"Unknown", NULL
, NULL
},
1748 {"string", k_type_string
, parseString
},
1749 {"binary", k_type_binary
, parseBinary
},
1750 {"table", k_type_table
, parseTable
},
1751 {"table(nofallback)", k_type_table_no_fallback
, NULL
}, /* parseFunction will never be called */
1752 {"integer", k_type_integer
, parseInteger
},
1753 {"array", k_type_array
, parseArray
},
1754 {"alias", k_type_alias
, parseAlias
},
1755 {"intvector", k_type_intvector
, parseIntVector
},
1756 {"import", k_type_import
, parseImport
},
1757 {"include", k_type_include
, parseInclude
},
1758 {"process(uca_rules)", k_type_plugin_uca_rules
, parseUCARules
},
1759 {"process(collation)", k_type_plugin_collation
, NULL
/* not implemented yet */},
1760 {"process(transliterator)", k_type_plugin_transliterator
, parseTransliterator
},
1761 {"process(dependency)", k_type_plugin_dependency
, parseDependency
},
1762 {"reserved", NULL
, NULL
}
1767 U_STRING_INIT(k_type_string
, "string", 6);
1768 U_STRING_INIT(k_type_binary
, "binary", 6);
1769 U_STRING_INIT(k_type_bin
, "bin", 3);
1770 U_STRING_INIT(k_type_table
, "table", 5);
1771 U_STRING_INIT(k_type_table_no_fallback
, "table(nofallback)", 17);
1772 U_STRING_INIT(k_type_int
, "int", 3);
1773 U_STRING_INIT(k_type_integer
, "integer", 7);
1774 U_STRING_INIT(k_type_array
, "array", 5);
1775 U_STRING_INIT(k_type_alias
, "alias", 5);
1776 U_STRING_INIT(k_type_intvector
, "intvector", 9);
1777 U_STRING_INIT(k_type_import
, "import", 6);
1778 U_STRING_INIT(k_type_include
, "include", 7);
1780 U_STRING_INIT(k_type_plugin_uca_rules
, "process(uca_rules)", 18);
1781 U_STRING_INIT(k_type_plugin_collation
, "process(collation)", 18);
1782 U_STRING_INIT(k_type_plugin_transliterator
, "process(transliterator)", 23);
1783 U_STRING_INIT(k_type_plugin_dependency
, "process(dependency)", 19);
1786 static inline UBool
isTable(enum EResourceType type
) {
1787 return (UBool
)(type
==RESTYPE_TABLE
|| type
==RESTYPE_TABLE_NO_FALLBACK
);
1790 static enum EResourceType
1791 parseResourceType(ParseState
* state
, UErrorCode
*status
)
1793 struct UString
*tokenValue
;
1794 struct UString comment
;
1795 enum EResourceType result
= RESTYPE_UNKNOWN
;
1797 ustr_init(&comment
);
1798 expect(state
, TOK_STRING
, &tokenValue
, &comment
, &line
, status
);
1800 if (U_FAILURE(*status
))
1802 return RESTYPE_UNKNOWN
;
1805 *status
= U_ZERO_ERROR
;
1807 /* Search for normal types */
1808 result
=RESTYPE_UNKNOWN
;
1809 while ((result
=(EResourceType
)(result
+1)) < RESTYPE_RESERVED
) {
1810 if (u_strcmp(tokenValue
->fChars
, gResourceTypes
[result
].nameUChars
) == 0) {
1814 /* Now search for the aliases */
1815 if (u_strcmp(tokenValue
->fChars
, k_type_int
) == 0) {
1816 result
= RESTYPE_INTEGER
;
1818 else if (u_strcmp(tokenValue
->fChars
, k_type_bin
) == 0) {
1819 result
= RESTYPE_BINARY
;
1821 else if (result
== RESTYPE_RESERVED
) {
1822 char tokenBuffer
[1024];
1823 u_austrncpy(tokenBuffer
, tokenValue
->fChars
, sizeof(tokenBuffer
));
1824 tokenBuffer
[sizeof(tokenBuffer
) - 1] = 0;
1825 *status
= U_INVALID_FORMAT_ERROR
;
1826 error(line
, "unknown resource type '%s'", tokenBuffer
);
1832 /* parse a non-top-level resource */
1833 static struct SResource
*
1834 parseResource(ParseState
* state
, char *tag
, const struct UString
*comment
, UErrorCode
*status
)
1836 enum ETokenType token
;
1837 enum EResourceType resType
= RESTYPE_UNKNOWN
;
1838 ParseResourceFunction
*parseFunction
= NULL
;
1839 struct UString
*tokenValue
;
1844 token
= getToken(state
, &tokenValue
, NULL
, &startline
, status
);
1847 printf(" resource %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1850 /* name . [ ':' type ] '{' resource '}' */
1851 /* This function parses from the colon onwards. If the colon is present, parse the
1852 type then try to parse a resource of that type. If there is no explicit type,
1853 work it out using the lookahead tokens. */
1857 *status
= U_INVALID_FORMAT_ERROR
;
1858 error(startline
, "Unexpected EOF encountered");
1862 *status
= U_INVALID_FORMAT_ERROR
;
1866 resType
= parseResourceType(state
, status
);
1867 expect(state
, TOK_OPEN_BRACE
, &tokenValue
, NULL
, &startline
, status
);
1869 if (U_FAILURE(*status
))
1876 case TOK_OPEN_BRACE
:
1880 *status
= U_INVALID_FORMAT_ERROR
;
1881 error(startline
, "syntax error while reading a resource, expected '{' or ':'");
1886 if (resType
== RESTYPE_UNKNOWN
)
1888 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
1889 We could have any of the following:
1890 { { => array (nested)
1892 { string , => string array
1896 { string :/{ => table
1897 { string } => string
1900 token
= peekToken(state
, 0, NULL
, &line
, NULL
,status
);
1902 if (U_FAILURE(*status
))
1907 if (token
== TOK_OPEN_BRACE
|| token
== TOK_COLON
||token
==TOK_CLOSE_BRACE
)
1909 resType
= RESTYPE_ARRAY
;
1911 else if (token
== TOK_STRING
)
1913 token
= peekToken(state
, 1, NULL
, &line
, NULL
, status
);
1915 if (U_FAILURE(*status
))
1922 case TOK_COMMA
: resType
= RESTYPE_ARRAY
; break;
1923 case TOK_OPEN_BRACE
: resType
= RESTYPE_TABLE
; break;
1924 case TOK_CLOSE_BRACE
: resType
= RESTYPE_STRING
; break;
1925 case TOK_COLON
: resType
= RESTYPE_TABLE
; break;
1927 *status
= U_INVALID_FORMAT_ERROR
;
1928 error(line
, "Unexpected token after string, expected ',', '{' or '}'");
1934 *status
= U_INVALID_FORMAT_ERROR
;
1935 error(line
, "Unexpected token after '{'");
1939 /* printf("Type guessed as %s\n", resourceNames[resType]); */
1940 } else if(resType
== RESTYPE_TABLE_NO_FALLBACK
) {
1941 *status
= U_INVALID_FORMAT_ERROR
;
1942 error(startline
, "error: %s resource type not valid except on top bundle level", gResourceTypes
[resType
].nameChars
);
1947 /* We should now know what we need to parse next, so call the appropriate parser
1948 function and return. */
1949 parseFunction
= gResourceTypes
[resType
].parseFunction
;
1950 if (parseFunction
!= NULL
) {
1951 return parseFunction(state
, tag
, startline
, comment
, status
);
1954 *status
= U_INTERNAL_PROGRAM_ERROR
;
1955 error(startline
, "internal error: %s resource type found and not handled", gResourceTypes
[resType
].nameChars
);
1961 /* parse the top-level resource */
1963 parse(UCHARBUF
*buf
, const char *inputDir
, const char *outputDir
, const char *filename
,
1964 UBool makeBinaryCollation
, UBool omitCollationRules
, UErrorCode
*status
)
1966 struct UString
*tokenValue
;
1967 struct UString comment
;
1969 enum EResourceType bundleType
;
1970 enum ETokenType token
;
1975 for (i
= 0; i
< MAX_LOOKAHEAD
+ 1; i
++)
1977 ustr_init(&state
.lookahead
[i
].value
);
1978 ustr_init(&state
.lookahead
[i
].comment
);
1981 initLookahead(&state
, buf
, status
);
1983 state
.inputdir
= inputDir
;
1984 state
.inputdirLength
= (state
.inputdir
!= NULL
) ? (uint32_t)uprv_strlen(state
.inputdir
) : 0;
1985 state
.outputdir
= outputDir
;
1986 state
.outputdirLength
= (state
.outputdir
!= NULL
) ? (uint32_t)uprv_strlen(state
.outputdir
) : 0;
1987 state
.filename
= filename
;
1988 state
.makeBinaryCollation
= makeBinaryCollation
;
1989 state
.omitCollationRules
= omitCollationRules
;
1991 ustr_init(&comment
);
1992 expect(&state
, TOK_STRING
, &tokenValue
, &comment
, NULL
, status
);
1994 state
.bundle
= new SRBRoot(&comment
, FALSE
, *status
);
1996 if (state
.bundle
== NULL
|| U_FAILURE(*status
))
2002 state
.bundle
->setLocale(tokenValue
->fChars
, *status
);
2004 /* The following code is to make Empty bundle work no matter with :table specifer or not */
2005 token
= getToken(&state
, NULL
, NULL
, &line
, status
);
2006 if(token
==TOK_COLON
) {
2007 *status
=U_ZERO_ERROR
;
2008 bundleType
=parseResourceType(&state
, status
);
2010 if(isTable(bundleType
))
2012 expect(&state
, TOK_OPEN_BRACE
, NULL
, NULL
, &line
, status
);
2016 *status
=U_PARSE_ERROR
;
2017 error(line
, "parse error. Stopped parsing with %s", u_errorName(*status
));
2023 if(token
==TOK_OPEN_BRACE
)
2025 *status
=U_ZERO_ERROR
;
2026 bundleType
=RESTYPE_TABLE
;
2030 /* neither colon nor open brace */
2031 *status
=U_PARSE_ERROR
;
2032 bundleType
=RESTYPE_UNKNOWN
;
2033 error(line
, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status
));
2037 if (U_FAILURE(*status
))
2039 delete state
.bundle
;
2043 if(bundleType
==RESTYPE_TABLE_NO_FALLBACK
) {
2045 * Parse a top-level table with the table(nofallback) declaration.
2046 * This is the same as a regular table, but also sets the
2047 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2049 state
.bundle
->fNoFallback
=TRUE
;
2051 /* top-level tables need not handle special table names like "collations" */
2052 assert(!state
.bundle
->fIsPoolBundle
);
2053 assert(state
.bundle
->fRoot
->fType
== URES_TABLE
);
2054 TableResource
*rootTable
= static_cast<TableResource
*>(state
.bundle
->fRoot
);
2055 realParseTable(&state
, rootTable
, NULL
, line
, status
);
2056 if(dependencyArray
!=NULL
){
2057 rootTable
->add(dependencyArray
, 0, *status
);
2058 dependencyArray
= NULL
;
2060 if (U_FAILURE(*status
))
2062 delete state
.bundle
;
2063 res_close(dependencyArray
);
2067 if (getToken(&state
, NULL
, NULL
, &line
, status
) != TOK_EOF
)
2069 warning(line
, "extraneous text after resource bundle (perhaps unmatched braces)");
2071 *status
= U_INVALID_FORMAT_ERROR
;
2076 cleanupLookahead(&state
);
2077 ustr_deinit(&comment
);
2078 return state
.bundle
;