2 *******************************************************************************
4 * Copyright (C) 1998-2015, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 05/26/99 stephen Creation.
15 * 02/25/00 weiv Overhaul to write udata
16 * 5/10/01 Ram removed ustdio dependency
17 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
18 *******************************************************************************
21 // Safer use of UnicodeString.
22 #ifndef UNISTR_FROM_CHAR_EXPLICIT
23 # define UNISTR_FROM_CHAR_EXPLICIT explicit
26 // Less important, but still a good idea.
27 #ifndef UNISTR_FROM_STRING_EXPLICIT
28 # define UNISTR_FROM_STRING_EXPLICIT explicit
43 #include "unicode/stringpiece.h"
44 #include "unicode/unistr.h"
45 #include "unicode/ustring.h"
46 #include "unicode/uscript.h"
47 #include "unicode/utf16.h"
48 #include "unicode/putil.h"
50 #include "collationbuilder.h"
51 #include "collationdata.h"
52 #include "collationdatareader.h"
53 #include "collationdatawriter.h"
54 #include "collationfastlatinbuilder.h"
55 #include "collationinfo.h"
56 #include "collationroot.h"
57 #include "collationruleparser.h"
58 #include "collationtailoring.h"
61 /* Number of tokens to read ahead of the current stream position */
62 #define MAX_LOOKAHEAD 3
72 #define STARTCOMMAND 0x005B
73 #define ENDCOMMAND 0x005D
74 #define OPENSQBRACKET 0x005B
75 #define CLOSESQBRACKET 0x005D
77 using icu::CharString
;
78 using icu::LocalMemory
;
79 using icu::LocalPointer
;
80 using icu::LocalUCHARBUFPointer
;
81 using icu::StringPiece
;
82 using icu::UnicodeString
;
88 struct UString comment
;
92 /* keep in sync with token defines in read.h */
93 const char *tokenNames
[TOK_TOKEN_COUNT
] =
95 "string", /* A string token, such as "MonthNames" */
96 "'{'", /* An opening brace character */
97 "'}'", /* A closing brace character */
101 "<end of file>", /* End of the file has been reached successfully */
105 /* Just to store "TRUE" */
106 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
109 struct Lookahead lookahead
[MAX_LOOKAHEAD
+ 1];
110 uint32_t lookaheadPosition
;
112 struct SRBRoot
*bundle
;
113 const char *inputdir
;
114 uint32_t inputdirLength
;
115 const char *outputdir
;
116 uint32_t outputdirLength
;
117 const char *filename
;
118 UBool makeBinaryCollation
;
119 UBool omitCollationRules
;
122 typedef struct SResource
*
123 ParseResourceFunction(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
);
125 static struct SResource
*parseResource(ParseState
* state
, char *tag
, const struct UString
*comment
, UErrorCode
*status
);
127 /* The nature of the lookahead buffer:
128 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
129 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
130 When getToken is called, the current pointer is moved to the next slot and the
131 old slot is filled with the next token from the reader by calling getNextToken.
132 The token values are stored in the slot, which means that token values don't
133 survive a call to getToken, ie.
137 getToken(&value, NULL, status);
138 getToken(NULL, NULL, status); bad - value is now a different string
141 initLookahead(ParseState
* state
, UCHARBUF
*buf
, UErrorCode
*status
)
143 static uint32_t initTypeStrings
= 0;
146 if (!initTypeStrings
)
151 state
->lookaheadPosition
= 0;
156 for (i
= 0; i
< MAX_LOOKAHEAD
; i
++)
158 state
->lookahead
[i
].type
= getNextToken(state
->buffer
, &state
->lookahead
[i
].value
, &state
->lookahead
[i
].line
, &state
->lookahead
[i
].comment
, status
);
159 if (U_FAILURE(*status
))
165 *status
= U_ZERO_ERROR
;
169 cleanupLookahead(ParseState
* state
)
172 for (i
= 0; i
<= MAX_LOOKAHEAD
; i
++)
174 ustr_deinit(&state
->lookahead
[i
].value
);
175 ustr_deinit(&state
->lookahead
[i
].comment
);
180 static enum ETokenType
181 getToken(ParseState
* state
, struct UString
**tokenValue
, struct UString
* comment
, uint32_t *linenumber
, UErrorCode
*status
)
183 enum ETokenType result
;
186 result
= state
->lookahead
[state
->lookaheadPosition
].type
;
188 if (tokenValue
!= NULL
)
190 *tokenValue
= &state
->lookahead
[state
->lookaheadPosition
].value
;
193 if (linenumber
!= NULL
)
195 *linenumber
= state
->lookahead
[state
->lookaheadPosition
].line
;
200 ustr_cpy(comment
, &(state
->lookahead
[state
->lookaheadPosition
].comment
), status
);
203 i
= (state
->lookaheadPosition
+ MAX_LOOKAHEAD
) % (MAX_LOOKAHEAD
+ 1);
204 state
->lookaheadPosition
= (state
->lookaheadPosition
+ 1) % (MAX_LOOKAHEAD
+ 1);
205 ustr_setlen(&state
->lookahead
[i
].comment
, 0, status
);
206 ustr_setlen(&state
->lookahead
[i
].value
, 0, status
);
207 state
->lookahead
[i
].type
= getNextToken(state
->buffer
, &state
->lookahead
[i
].value
, &state
->lookahead
[i
].line
, &state
->lookahead
[i
].comment
, status
);
209 /* printf("getToken, returning %s\n", tokenNames[result]); */
214 static enum ETokenType
215 peekToken(ParseState
* state
, uint32_t lookaheadCount
, struct UString
**tokenValue
, uint32_t *linenumber
, struct UString
*comment
, UErrorCode
*status
)
217 uint32_t i
= (state
->lookaheadPosition
+ lookaheadCount
) % (MAX_LOOKAHEAD
+ 1);
219 if (U_FAILURE(*status
))
224 if (lookaheadCount
>= MAX_LOOKAHEAD
)
226 *status
= U_INTERNAL_PROGRAM_ERROR
;
230 if (tokenValue
!= NULL
)
232 *tokenValue
= &state
->lookahead
[i
].value
;
235 if (linenumber
!= NULL
)
237 *linenumber
= state
->lookahead
[i
].line
;
241 ustr_cpy(comment
, &(state
->lookahead
[state
->lookaheadPosition
].comment
), status
);
244 return state
->lookahead
[i
].type
;
248 expect(ParseState
* state
, enum ETokenType expectedToken
, struct UString
**tokenValue
, struct UString
*comment
, uint32_t *linenumber
, UErrorCode
*status
)
252 enum ETokenType token
= getToken(state
, tokenValue
, comment
, &line
, status
);
254 if (linenumber
!= NULL
)
259 if (U_FAILURE(*status
))
264 if (token
!= expectedToken
)
266 *status
= U_INVALID_FORMAT_ERROR
;
267 error(line
, "expecting %s, got %s", tokenNames
[expectedToken
], tokenNames
[token
]);
271 *status
= U_ZERO_ERROR
;
275 static char *getInvariantString(ParseState
* state
, uint32_t *line
, struct UString
*comment
, UErrorCode
*status
)
277 struct UString
*tokenValue
;
281 expect(state
, TOK_STRING
, &tokenValue
, comment
, line
, status
);
283 if (U_FAILURE(*status
))
288 count
= u_strlen(tokenValue
->fChars
);
289 if(!uprv_isInvariantUString(tokenValue
->fChars
, count
)) {
290 *status
= U_INVALID_FORMAT_ERROR
;
291 error(*line
, "invariant characters required for table keys, binary data, etc.");
295 result
= static_cast<char *>(uprv_malloc(count
+1));
299 *status
= U_MEMORY_ALLOCATION_ERROR
;
303 u_UCharsToChars(tokenValue
->fChars
, result
, count
+1);
307 static struct SResource
*
308 parseUCARules(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* /*comment*/, UErrorCode
*status
)
310 struct SResource
*result
= NULL
;
311 struct UString
*tokenValue
;
312 FileStream
*file
= NULL
;
313 char filename
[256] = { '\0' };
314 char cs
[128] = { '\0' };
316 UBool quoted
= FALSE
;
317 UCHARBUF
*ucbuf
=NULL
;
319 const char* cp
= NULL
;
320 UChar
*pTarget
= NULL
;
321 UChar
*target
= NULL
;
322 UChar
*targetLimit
= NULL
;
325 expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
);
328 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
331 if (U_FAILURE(*status
))
335 /* make the filename including the directory */
336 if (state
->inputdir
!= NULL
)
338 uprv_strcat(filename
, state
->inputdir
);
340 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
342 uprv_strcat(filename
, U_FILE_SEP_STRING
);
346 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
348 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
350 if (U_FAILURE(*status
))
354 uprv_strcat(filename
, cs
);
356 if(state
->omitCollationRules
) {
360 ucbuf
= ucbuf_open(filename
, &cp
, getShowWarning(),FALSE
, status
);
362 if (U_FAILURE(*status
)) {
363 error(line
, "An error occured while opening the input file %s\n", filename
);
367 /* We allocate more space than actually required
368 * since the actual size needed for storing UChars
369 * is not known in UTF-8 byte stream
371 size
= ucbuf_size(ucbuf
) + 1;
372 pTarget
= (UChar
*) uprv_malloc(U_SIZEOF_UCHAR
* size
);
373 uprv_memset(pTarget
, 0, size
*U_SIZEOF_UCHAR
);
375 targetLimit
= pTarget
+size
;
377 /* read the rules into the buffer */
378 while (target
< targetLimit
)
380 c
= ucbuf_getc(ucbuf
, status
);
382 quoted
= (UBool
)!quoted
;
384 /* weiv (06/26/2002): adding the following:
385 * - preserving spaces in commands [...]
386 * - # comments until the end of line
388 if (c
== STARTCOMMAND
&& !quoted
)
391 * closing bracket will be handled by the
392 * append at the end of the loop
394 while(c
!= ENDCOMMAND
) {
395 U_APPEND_CHAR32_ONLY(c
, target
);
396 c
= ucbuf_getc(ucbuf
, status
);
399 else if (c
== HASH
&& !quoted
) {
401 while(c
!= CR
&& c
!= LF
) {
402 c
= ucbuf_getc(ucbuf
, status
);
406 else if (c
== ESCAPE
)
408 c
= unescape(ucbuf
, status
);
410 if (c
== (UChar32
)U_ERR
)
413 T_FileStream_close(file
);
417 else if (!quoted
&& (c
== SPACE
|| c
== TAB
|| c
== CR
|| c
== LF
))
419 /* ignore spaces carriage returns
420 * and line feed unless in the form \uXXXX
425 /* Append UChar * after dissembling if c > 0xffff*/
426 if (c
!= (UChar32
)U_EOF
)
428 U_APPEND_CHAR32_ONLY(c
, target
);
436 /* terminate the string */
437 if(target
< targetLimit
){
441 result
= string_open(state
->bundle
, tag
, pTarget
, (int32_t)(target
- pTarget
), NULL
, status
);
446 T_FileStream_close(file
);
451 static struct SResource
*
452 parseTransliterator(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* /*comment*/, UErrorCode
*status
)
454 struct SResource
*result
= NULL
;
455 struct UString
*tokenValue
;
456 FileStream
*file
= NULL
;
457 char filename
[256] = { '\0' };
458 char cs
[128] = { '\0' };
460 UCHARBUF
*ucbuf
=NULL
;
461 const char* cp
= NULL
;
462 UChar
*pTarget
= NULL
;
463 const UChar
*pSource
= NULL
;
466 expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
);
469 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
472 if (U_FAILURE(*status
))
476 /* make the filename including the directory */
477 if (state
->inputdir
!= NULL
)
479 uprv_strcat(filename
, state
->inputdir
);
481 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
483 uprv_strcat(filename
, U_FILE_SEP_STRING
);
487 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
489 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
491 if (U_FAILURE(*status
))
495 uprv_strcat(filename
, cs
);
498 ucbuf
= ucbuf_open(filename
, &cp
, getShowWarning(),FALSE
, status
);
500 if (U_FAILURE(*status
)) {
501 error(line
, "An error occured while opening the input file %s\n", filename
);
505 /* We allocate more space than actually required
506 * since the actual size needed for storing UChars
507 * is not known in UTF-8 byte stream
509 pSource
= ucbuf_getBuffer(ucbuf
, &size
, status
);
510 pTarget
= (UChar
*) uprv_malloc(U_SIZEOF_UCHAR
* (size
+ 1));
511 uprv_memset(pTarget
, 0, size
*U_SIZEOF_UCHAR
);
513 #if !UCONFIG_NO_TRANSLITERATION
514 size
= utrans_stripRules(pSource
, size
, pTarget
, status
);
517 fprintf(stderr
, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
519 result
= string_open(state
->bundle
, tag
, pTarget
, size
, NULL
, status
);
523 T_FileStream_close(file
);
527 static ArrayResource
* dependencyArray
= NULL
;
529 static struct SResource
*
530 parseDependency(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
532 struct SResource
*result
= NULL
;
533 struct SResource
*elem
= NULL
;
534 struct UString
*tokenValue
;
536 char filename
[256] = { '\0' };
537 char cs
[128] = { '\0' };
539 expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
);
542 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
545 if (U_FAILURE(*status
))
549 /* make the filename including the directory */
550 if (state
->outputdir
!= NULL
)
552 uprv_strcat(filename
, state
->outputdir
);
554 if (state
->outputdir
[state
->outputdirLength
- 1] != U_FILE_SEP_CHAR
)
556 uprv_strcat(filename
, U_FILE_SEP_STRING
);
560 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
562 if (U_FAILURE(*status
))
566 uprv_strcat(filename
, cs
);
567 if(!T_FileStream_file_exists(filename
)){
569 error(line
, "The dependency file %s does not exist. Please make sure it exists.\n",filename
);
571 warning(line
, "The dependency file %s does not exist. Please make sure it exists.\n",filename
);
574 if(dependencyArray
==NULL
){
575 dependencyArray
= array_open(state
->bundle
, "%%DEPENDENCY", NULL
, status
);
578 result
= string_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
580 elem
= string_open(state
->bundle
, NULL
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
582 dependencyArray
->add(elem
);
584 if (U_FAILURE(*status
))
588 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
591 static struct SResource
*
592 parseString(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
594 struct UString
*tokenValue
;
595 struct SResource
*result
= NULL
;
597 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
599 return parseUCARules(tag, startline, status);
602 printf(" string %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
604 expect(state
, TOK_STRING
, &tokenValue
, NULL
, NULL
, status
);
606 if (U_SUCCESS(*status
))
608 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
609 doesn't survive expect either) */
611 result
= string_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
612 if(U_SUCCESS(*status
) && result
) {
613 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
615 if (U_FAILURE(*status
))
626 static struct SResource
*
627 parseAlias(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
629 struct UString
*tokenValue
;
630 struct SResource
*result
= NULL
;
632 expect(state
, TOK_STRING
, &tokenValue
, NULL
, NULL
, status
);
635 printf(" alias %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
638 if (U_SUCCESS(*status
))
640 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
641 doesn't survive expect either) */
643 result
= alias_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
645 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
647 if (U_FAILURE(*status
))
657 #if !UCONFIG_NO_COLLATION
661 static struct SResource
* resLookup(struct SResource
* res
, const char* key
){
662 if (res
== res_none() || !res
->isTable()) {
666 TableResource
*list
= static_cast<TableResource
*>(res
);
667 SResource
*current
= list
->fFirst
;
668 while (current
!= NULL
) {
669 if (uprv_strcmp(((list
->fRoot
->fKeys
) + (current
->fKey
)), key
) == 0) {
672 current
= current
->fNext
;
677 class GenrbImporter
: public icu::CollationRuleParser::Importer
{
679 GenrbImporter(const char *in
, const char *out
) : inputDir(in
), outputDir(out
) {}
680 virtual ~GenrbImporter();
681 virtual void getRules(
682 const char *localeID
, const char *collationType
,
683 UnicodeString
&rules
,
684 const char *&errorReason
, UErrorCode
&errorCode
);
687 const char *inputDir
;
688 const char *outputDir
;
691 GenrbImporter::~GenrbImporter() {}
694 GenrbImporter::getRules(
695 const char *localeID
, const char *collationType
,
696 UnicodeString
&rules
,
697 const char *& /*errorReason*/, UErrorCode
&errorCode
) {
698 CharString
filename(localeID
, errorCode
);
699 for(int32_t i
= 0; i
< filename
.length(); i
++){
700 if(filename
[i
] == '-'){
701 filename
.data()[i
] = '_';
704 filename
.append(".txt", errorCode
);
705 if (U_FAILURE(errorCode
)) {
708 CharString inputDirBuf
;
709 CharString openFileName
;
710 if(inputDir
== NULL
) {
711 const char *filenameBegin
= uprv_strrchr(filename
.data(), U_FILE_SEP_CHAR
);
712 if (filenameBegin
!= NULL
) {
714 * When a filename ../../../data/root.txt is specified,
715 * we presume that the input directory is ../../../data
716 * This is very important when the resource file includes
717 * another file, like UCARules.txt or thaidict.brk.
719 StringPiece dir
= filename
.toStringPiece();
720 const char *filenameLimit
= filename
.data() + filename
.length();
721 dir
.remove_suffix((int32_t)(filenameLimit
- filenameBegin
));
722 inputDirBuf
.append(dir
, errorCode
);
723 inputDir
= inputDirBuf
.data();
726 int32_t dirlen
= (int32_t)uprv_strlen(inputDir
);
728 if((filename
[0] != U_FILE_SEP_CHAR
) && (inputDir
[dirlen
-1] !='.')) {
730 * append the input dir to openFileName if the first char in
731 * filename is not file separator char and the last char input directory is not '.'.
732 * This is to support :
733 * genrb -s. /home/icu/data
735 * The user cannot mix notations like
736 * genrb -s. /icu/data --- the absolute path specified. -s redundant
738 * genrb -s. icu/data --- start from CWD and look in icu/data dir
740 openFileName
.append(inputDir
, dirlen
, errorCode
);
741 if(inputDir
[dirlen
-1] != U_FILE_SEP_CHAR
) {
742 openFileName
.append(U_FILE_SEP_CHAR
, errorCode
);
746 openFileName
.append(filename
, errorCode
);
747 if(U_FAILURE(errorCode
)) {
750 // printf("GenrbImporter::getRules(%s, %s) reads %s\n", localeID, collationType, openFileName.data());
752 LocalUCHARBUFPointer
ucbuf(
753 ucbuf_open(openFileName
.data(), &cp
, getShowWarning(), TRUE
, &errorCode
));
754 if(errorCode
== U_FILE_ACCESS_ERROR
) {
755 fprintf(stderr
, "couldn't open file %s\n", openFileName
.data());
758 if (ucbuf
.isNull() || U_FAILURE(errorCode
)) {
759 fprintf(stderr
, "An error occured processing file %s. Error: %s\n", openFileName
.data(), u_errorName(errorCode
));
763 /* Parse the data into an SRBRoot */
764 struct SRBRoot
*data
=
765 parse(ucbuf
.getAlias(), inputDir
, outputDir
, filename
.data(), FALSE
, FALSE
, &errorCode
);
766 if (U_FAILURE(errorCode
)) {
770 struct SResource
*root
= data
->fRoot
;
771 struct SResource
*collations
= resLookup(root
, "collations");
772 if (collations
!= NULL
) {
773 struct SResource
*collation
= resLookup(collations
, collationType
);
774 if (collation
!= NULL
) {
775 struct SResource
*sequence
= resLookup(collation
, "Sequence");
776 if (sequence
!= NULL
&& sequence
->isString()) {
777 // No string pointer aliasing so that we need not hold onto the resource bundle.
778 StringResource
*sr
= static_cast<StringResource
*>(sequence
);
785 // Quick-and-dirty escaping function.
786 // Assumes that we are on an ASCII-based platform.
788 escape(const UChar
*s
, char *buffer
) {
789 int32_t length
= u_strlen(s
);
793 U16_NEXT(s
, i
, length
, c
);
797 } else if (0x20 <= c
&& c
<= 0x7e) {
799 *buffer
++ = (char)c
; // assumes ASCII-based platform
801 buffer
+= sprintf(buffer
, "\\u%04X", (int)c
);
808 #endif // !UCONFIG_NO_COLLATION
810 static TableResource
*
811 addCollation(ParseState
* state
, TableResource
*result
, const char *collationType
,
812 uint32_t startline
, UErrorCode
*status
)
814 // TODO: Use LocalPointer for result, or make caller close it when there is a failure.
815 struct SResource
*member
= NULL
;
816 struct UString
*tokenValue
;
817 struct UString comment
;
818 enum ETokenType token
;
821 UBool haveRules
= FALSE
;
822 UVersionInfo version
;
825 /* '{' . (name resource)* '}' */
826 version
[0]=0; version
[1]=0; version
[2]=0; version
[3]=0;
831 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
833 if (token
== TOK_CLOSE_BRACE
)
838 if (token
!= TOK_STRING
)
841 *status
= U_INVALID_FORMAT_ERROR
;
843 if (token
== TOK_EOF
)
845 error(startline
, "unterminated table");
849 error(line
, "Unexpected token %s", tokenNames
[token
]);
855 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
857 if (U_FAILURE(*status
))
863 member
= parseResource(state
, subtag
, NULL
, status
);
865 if (U_FAILURE(*status
))
872 // Ignore the parsed resources, continue parsing.
874 else if (uprv_strcmp(subtag
, "Version") == 0 && member
->isString())
876 StringResource
*sr
= static_cast<StringResource
*>(member
);
878 int32_t length
= sr
->length();
880 if (length
>= UPRV_LENGTHOF(ver
))
882 length
= UPRV_LENGTHOF(ver
) - 1;
885 sr
->fString
.extract(0, length
, ver
, UPRV_LENGTHOF(ver
), US_INV
);
886 u_versionFromString(version
, ver
);
888 result
->add(member
, line
, *status
);
891 else if(uprv_strcmp(subtag
, "%%CollationBin")==0)
893 /* discard duplicate %%CollationBin if any*/
895 else if (uprv_strcmp(subtag
, "Sequence") == 0 && member
->isString())
897 StringResource
*sr
= static_cast<StringResource
*>(member
);
900 // Defer building the collator until we have seen
901 // all sub-elements of the collation table, including the Version.
902 /* in order to achieve smaller data files, we can direct genrb */
903 /* to omit collation rules */
904 if(!state
->omitCollationRules
) {
905 result
->add(member
, line
, *status
);
909 else // Just copy non-special items.
911 result
->add(member
, line
, *status
);
914 res_close(member
); // TODO: use LocalPointer
915 if (U_FAILURE(*status
))
922 if (!haveRules
) { return result
; }
924 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
925 warning(line
, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
928 // CLDR ticket #3949, ICU ticket #8082:
929 // Do not build collation binary data for for-import-only "private" collation rule strings.
930 if (uprv_strncmp(collationType
, "private-", 8) == 0) {
932 printf("Not building %s~%s collation binary\n", state
->filename
, collationType
);
937 if(!state
->makeBinaryCollation
) {
939 printf("Not building %s~%s collation binary\n", state
->filename
, collationType
);
943 UErrorCode intStatus
= U_ZERO_ERROR
;
944 UParseError parseError
;
945 uprv_memset(&parseError
, 0, sizeof(parseError
));
946 GenrbImporter
importer(state
->inputdir
, state
->outputdir
);
947 const icu::CollationTailoring
*base
= icu::CollationRoot::getRoot(intStatus
);
948 if(U_FAILURE(intStatus
)) {
949 error(line
, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus
));
951 return NULL
; // TODO: use LocalUResourceBundlePointer for result
953 icu::CollationBuilder
builder(base
, intStatus
);
954 if(uprv_strncmp(collationType
, "search", 6) == 0) {
955 builder
.disableFastLatin(); // build fast-Latin table unless search collator
957 LocalPointer
<icu::CollationTailoring
> t(
958 builder
.parseAndBuild(rules
, version
, &importer
, &parseError
, intStatus
));
959 if(U_FAILURE(intStatus
)) {
960 const char *reason
= builder
.getErrorReason();
961 if(reason
== NULL
) { reason
= ""; }
962 error(line
, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s %s",
963 state
->filename
, collationType
,
964 (long)parseError
.offset
, u_errorName(intStatus
), reason
);
965 if(parseError
.preContext
[0] != 0 || parseError
.postContext
[0] != 0) {
966 // Print pre- and post-context.
967 char preBuffer
[100], postBuffer
[100];
968 escape(parseError
.preContext
, preBuffer
);
969 escape(parseError
.postContext
, postBuffer
);
970 error(line
, " error context: \"...%s\" ! \"%s...\"", preBuffer
, postBuffer
);
972 if(isStrict() || t
.isNull()) {
978 icu::LocalMemory
<uint8_t> buffer
;
979 int32_t capacity
= 100000;
980 uint8_t *dest
= buffer
.allocateInsteadAndCopy(capacity
);
982 fprintf(stderr
, "memory allocation (%ld bytes) for file contents failed\n",
984 *status
= U_MEMORY_ALLOCATION_ERROR
;
988 int32_t indexes
[icu::CollationDataReader::IX_TOTAL_SIZE
+ 1];
989 int32_t totalSize
= icu::CollationDataWriter::writeTailoring(
990 *t
, *t
->settings
, indexes
, dest
, capacity
, intStatus
);
991 if(intStatus
== U_BUFFER_OVERFLOW_ERROR
) {
992 intStatus
= U_ZERO_ERROR
;
993 capacity
= totalSize
;
994 dest
= buffer
.allocateInsteadAndCopy(capacity
);
996 fprintf(stderr
, "memory allocation (%ld bytes) for file contents failed\n",
998 *status
= U_MEMORY_ALLOCATION_ERROR
;
1002 totalSize
= icu::CollationDataWriter::writeTailoring(
1003 *t
, *t
->settings
, indexes
, dest
, capacity
, intStatus
);
1005 if(U_FAILURE(intStatus
)) {
1006 fprintf(stderr
, "CollationDataWriter::writeTailoring() failed: %s\n",
1007 u_errorName(intStatus
));
1012 printf("%s~%s collation tailoring part sizes:\n", state
->filename
, collationType
);
1013 icu::CollationInfo::printSizes(totalSize
, indexes
);
1014 if(t
->settings
->hasReordering()) {
1015 printf("%s~%s collation reordering ranges:\n", state
->filename
, collationType
);
1016 icu::CollationInfo::printReorderRanges(
1017 *t
->data
, t
->settings
->reorderCodes
, t
->settings
->reorderCodesLength
);
1020 struct SResource
*collationBin
= bin_open(state
->bundle
, "%%CollationBin", totalSize
, dest
, NULL
, NULL
, status
);
1021 result
->add(collationBin
, line
, *status
);
1022 if (U_FAILURE(*status
)) {
1031 keepCollationType(const char * /*type*/) {
1035 static struct SResource
*
1036 parseCollationElements(ParseState
* state
, char *tag
, uint32_t startline
, UBool newCollation
, UErrorCode
*status
)
1038 TableResource
*result
= NULL
;
1039 struct SResource
*member
= NULL
;
1040 struct UString
*tokenValue
;
1041 struct UString comment
;
1042 enum ETokenType token
;
1043 char subtag
[1024], typeKeyword
[1024];
1046 result
= table_open(state
->bundle
, tag
, NULL
, status
);
1048 if (result
== NULL
|| U_FAILURE(*status
))
1053 printf(" collation elements %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1056 return addCollation(state
, result
, "(no type)", startline
, status
);
1060 ustr_init(&comment
);
1061 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
1063 if (token
== TOK_CLOSE_BRACE
)
1068 if (token
!= TOK_STRING
)
1071 *status
= U_INVALID_FORMAT_ERROR
;
1073 if (token
== TOK_EOF
)
1075 error(startline
, "unterminated table");
1079 error(line
, "Unexpected token %s", tokenNames
[token
]);
1085 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
1087 if (U_FAILURE(*status
))
1093 if (uprv_strcmp(subtag
, "default") == 0)
1095 member
= parseResource(state
, subtag
, NULL
, status
);
1097 if (U_FAILURE(*status
))
1103 result
->add(member
, line
, *status
);
1107 token
= peekToken(state
, 0, &tokenValue
, &line
, &comment
, status
);
1108 /* this probably needs to be refactored or recursively use the parser */
1109 /* first we assume that our collation table won't have the explicit type */
1110 /* then, we cannot handle aliases */
1111 if(token
== TOK_OPEN_BRACE
) {
1112 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
1113 TableResource
*collationRes
;
1114 if (keepCollationType(subtag
)) {
1115 collationRes
= table_open(state
->bundle
, subtag
, NULL
, status
);
1117 collationRes
= NULL
;
1119 // need to parse the collation data regardless
1120 collationRes
= addCollation(state
, collationRes
, subtag
, startline
, status
);
1121 if (collationRes
!= NULL
) {
1122 result
->add(collationRes
, startline
, *status
);
1124 } else if(token
== TOK_COLON
) { /* right now, we'll just try to see if we have aliases */
1125 /* we could have a table too */
1126 token
= peekToken(state
, 1, &tokenValue
, &line
, &comment
, status
);
1127 u_UCharsToChars(tokenValue
->fChars
, typeKeyword
, u_strlen(tokenValue
->fChars
) + 1);
1128 if(uprv_strcmp(typeKeyword
, "alias") == 0) {
1129 member
= parseResource(state
, subtag
, NULL
, status
);
1130 if (U_FAILURE(*status
))
1136 result
->add(member
, line
, *status
);
1139 *status
= U_INVALID_FORMAT_ERROR
;
1144 *status
= U_INVALID_FORMAT_ERROR
;
1149 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1151 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1153 if (U_FAILURE(*status
))
1162 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1163 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1164 static struct SResource
*
1165 realParseTable(ParseState
* state
, TableResource
*table
, char *tag
, uint32_t startline
, UErrorCode
*status
)
1167 struct SResource
*member
= NULL
;
1168 struct UString
*tokenValue
=NULL
;
1169 struct UString comment
;
1170 enum ETokenType token
;
1173 UBool readToken
= FALSE
;
1175 /* '{' . (name resource)* '}' */
1178 printf(" parsing table %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1182 ustr_init(&comment
);
1183 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
1185 if (token
== TOK_CLOSE_BRACE
)
1188 warning(startline
, "Encountered empty table");
1193 if (token
!= TOK_STRING
)
1195 *status
= U_INVALID_FORMAT_ERROR
;
1197 if (token
== TOK_EOF
)
1199 error(startline
, "unterminated table");
1203 error(line
, "unexpected token %s", tokenNames
[token
]);
1209 if(uprv_isInvariantUString(tokenValue
->fChars
, -1)) {
1210 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
1212 *status
= U_INVALID_FORMAT_ERROR
;
1213 error(line
, "invariant characters required for table keys");
1217 if (U_FAILURE(*status
))
1219 error(line
, "parse error. Stopped parsing tokens with %s", u_errorName(*status
));
1223 member
= parseResource(state
, subtag
, &comment
, status
);
1225 if (member
== NULL
|| U_FAILURE(*status
))
1227 error(line
, "parse error. Stopped parsing resource with %s", u_errorName(*status
));
1231 table
->add(member
, line
, *status
);
1233 if (U_FAILURE(*status
))
1235 error(line
, "parse error. Stopped parsing table with %s", u_errorName(*status
));
1239 ustr_deinit(&comment
);
1243 /* A compiler warning will appear if all paths don't contain a return statement. */
1244 /* *status = U_INTERNAL_PROGRAM_ERROR;
1248 static struct SResource
*
1249 parseTable(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1251 if (tag
!= NULL
&& uprv_strcmp(tag
, "CollationElements") == 0)
1253 return parseCollationElements(state
, tag
, startline
, FALSE
, status
);
1255 if (tag
!= NULL
&& uprv_strcmp(tag
, "collations") == 0)
1257 return parseCollationElements(state
, tag
, startline
, TRUE
, status
);
1260 printf(" table %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1263 TableResource
*result
= table_open(state
->bundle
, tag
, comment
, status
);
1265 if (result
== NULL
|| U_FAILURE(*status
))
1269 return realParseTable(state
, result
, tag
, startline
, status
);
1272 static struct SResource
*
1273 parseArray(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1275 struct SResource
*member
= NULL
;
1276 struct UString
*tokenValue
;
1277 struct UString memberComments
;
1278 enum ETokenType token
;
1279 UBool readToken
= FALSE
;
1281 ArrayResource
*result
= array_open(state
->bundle
, tag
, comment
, status
);
1283 if (result
== NULL
|| U_FAILURE(*status
))
1288 printf(" array %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1291 ustr_init(&memberComments
);
1293 /* '{' . resource [','] '}' */
1297 ustr_setlen(&memberComments
, 0, status
);
1299 /* check for end of array, but don't consume next token unless it really is the end */
1300 token
= peekToken(state
, 0, &tokenValue
, NULL
, &memberComments
, status
);
1303 if (token
== TOK_CLOSE_BRACE
)
1305 getToken(state
, NULL
, NULL
, NULL
, status
);
1307 warning(startline
, "Encountered empty array");
1312 if (token
== TOK_EOF
)
1315 *status
= U_INVALID_FORMAT_ERROR
;
1316 error(startline
, "unterminated array");
1320 /* string arrays are a special case */
1321 if (token
== TOK_STRING
)
1323 getToken(state
, &tokenValue
, &memberComments
, NULL
, status
);
1324 member
= string_open(state
->bundle
, NULL
, tokenValue
->fChars
, tokenValue
->fLength
, &memberComments
, status
);
1328 member
= parseResource(state
, NULL
, &memberComments
, status
);
1331 if (member
== NULL
|| U_FAILURE(*status
))
1337 result
->add(member
);
1339 /* eat optional comma if present */
1340 token
= peekToken(state
, 0, NULL
, NULL
, NULL
, status
);
1342 if (token
== TOK_COMMA
)
1344 getToken(state
, NULL
, NULL
, NULL
, status
);
1347 if (U_FAILURE(*status
))
1355 ustr_deinit(&memberComments
);
1359 static struct SResource
*
1360 parseIntVector(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1362 enum ETokenType token
;
1365 UBool readToken
= FALSE
;
1368 struct UString memberComments
;
1370 IntVectorResource
*result
= intvector_open(state
->bundle
, tag
, comment
, status
);
1372 if (result
== NULL
|| U_FAILURE(*status
))
1378 printf(" vector %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1380 ustr_init(&memberComments
);
1381 /* '{' . string [','] '}' */
1384 ustr_setlen(&memberComments
, 0, status
);
1386 /* check for end of array, but don't consume next token unless it really is the end */
1387 token
= peekToken(state
, 0, NULL
, NULL
,&memberComments
, status
);
1389 if (token
== TOK_CLOSE_BRACE
)
1391 /* it's the end, consume the close brace */
1392 getToken(state
, NULL
, NULL
, NULL
, status
);
1394 warning(startline
, "Encountered empty int vector");
1396 ustr_deinit(&memberComments
);
1400 string
= getInvariantString(state
, NULL
, NULL
, status
);
1402 if (U_FAILURE(*status
))
1408 /* For handling illegal char in the Intvector */
1409 value
= uprv_strtoul(string
, &stopstring
, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1410 len
=(uint32_t)(stopstring
-string
);
1412 if(len
==uprv_strlen(string
))
1414 result
->add(value
, *status
);
1416 token
= peekToken(state
, 0, NULL
, NULL
, NULL
, status
);
1421 *status
=U_INVALID_CHAR_FOUND
;
1424 if (U_FAILURE(*status
))
1430 /* the comma is optional (even though it is required to prevent the reader from concatenating
1431 consecutive entries) so that a missing comma on the last entry isn't an error */
1432 if (token
== TOK_COMMA
)
1434 getToken(state
, NULL
, NULL
, NULL
, status
);
1440 /* A compiler warning will appear if all paths don't contain a return statement. */
1441 /* intvector_close(result, status);
1442 *status = U_INTERNAL_PROGRAM_ERROR;
1446 static struct SResource
*
1447 parseBinary(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1450 LocalMemory
<char> string(getInvariantString(state
, &line
, NULL
, status
));
1451 if (string
.isNull() || U_FAILURE(*status
))
1456 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1457 if (U_FAILURE(*status
))
1463 printf(" binary %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1466 uint32_t count
= (uint32_t)uprv_strlen(string
.getAlias());
1469 LocalMemory
<uint8_t> value
;
1470 if (value
.allocateInsteadAndCopy(count
) == NULL
)
1472 *status
= U_MEMORY_ALLOCATION_ERROR
;
1476 char toConv
[3] = {'\0', '\0', '\0'};
1477 for (uint32_t i
= 0; i
< count
; i
+= 2)
1479 toConv
[0] = string
[i
];
1480 toConv
[1] = string
[i
+ 1];
1483 value
[i
>> 1] = (uint8_t) uprv_strtoul(toConv
, &stopstring
, 16);
1484 uint32_t len
=(uint32_t)(stopstring
-toConv
);
1488 *status
=U_INVALID_CHAR_FOUND
;
1493 return bin_open(state
->bundle
, tag
, count
>> 1, value
.getAlias(), NULL
, comment
, status
);
1497 *status
= U_INVALID_CHAR_FOUND
;
1498 error(line
, "Encountered invalid binary value (length is odd)");
1504 warning(startline
, "Encountered empty binary value");
1505 return bin_open(state
->bundle
, tag
, 0, NULL
, "", comment
, status
);
1509 static struct SResource
*
1510 parseInteger(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1512 struct SResource
*result
= NULL
;
1518 string
= getInvariantString(state
, NULL
, NULL
, status
);
1520 if (string
== NULL
|| U_FAILURE(*status
))
1525 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1527 if (U_FAILURE(*status
))
1534 printf(" integer %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1537 if (uprv_strlen(string
) <= 0)
1539 warning(startline
, "Encountered empty integer. Default value is 0.");
1542 /* Allow integer support for hexdecimal, octal digit and decimal*/
1543 /* and handle illegal char in the integer*/
1544 value
= uprv_strtoul(string
, &stopstring
, 0);
1545 len
=(uint32_t)(stopstring
-string
);
1546 if(len
==uprv_strlen(string
))
1548 result
= int_open(state
->bundle
, tag
, value
, comment
, status
);
1552 *status
=U_INVALID_CHAR_FOUND
;
1559 static struct SResource
*
1560 parseImport(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
1563 LocalMemory
<char> filename(getInvariantString(state
, &line
, NULL
, status
));
1564 if (U_FAILURE(*status
))
1569 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1571 if (U_FAILURE(*status
))
1577 printf(" import %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1580 /* Open the input file for reading */
1581 CharString fullname
;
1582 if (state
->inputdir
!= NULL
) {
1583 fullname
.append(state
->inputdir
, *status
);
1585 fullname
.appendPathPart(filename
.getAlias(), *status
);
1586 if (U_FAILURE(*status
)) {
1590 FileStream
*file
= T_FileStream_open(fullname
.data(), "rb");
1593 error(line
, "couldn't open input file %s", filename
.getAlias());
1594 *status
= U_FILE_ACCESS_ERROR
;
1598 int32_t len
= T_FileStream_size(file
);
1599 LocalMemory
<uint8_t> data
;
1600 if(data
.allocateInsteadAndCopy(len
) == NULL
)
1602 *status
= U_MEMORY_ALLOCATION_ERROR
;
1603 T_FileStream_close (file
);
1607 /* int32_t numRead = */ T_FileStream_read(file
, data
.getAlias(), len
);
1608 T_FileStream_close (file
);
1610 return bin_open(state
->bundle
, tag
, len
, data
.getAlias(), fullname
.data(), comment
, status
);
1613 static struct SResource
*
1614 parseInclude(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
1616 struct SResource
*result
;
1620 UChar
*pTarget
= NULL
;
1623 char *fullname
= NULL
;
1625 const char* cp
= NULL
;
1626 const UChar
* uBuffer
= NULL
;
1628 filename
= getInvariantString(state
, &line
, NULL
, status
);
1629 count
= (int32_t)uprv_strlen(filename
);
1631 if (U_FAILURE(*status
))
1636 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1638 if (U_FAILURE(*status
))
1640 uprv_free(filename
);
1645 printf(" include %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1648 fullname
= (char *) uprv_malloc(state
->inputdirLength
+ count
+ 2);
1650 if(fullname
== NULL
)
1652 *status
= U_MEMORY_ALLOCATION_ERROR
;
1653 uprv_free(filename
);
1657 if(state
->inputdir
!=NULL
){
1658 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
1661 uprv_strcpy(fullname
, state
->inputdir
);
1663 fullname
[state
->inputdirLength
] = U_FILE_SEP_CHAR
;
1664 fullname
[state
->inputdirLength
+ 1] = '\0';
1666 uprv_strcat(fullname
, filename
);
1670 uprv_strcpy(fullname
, state
->inputdir
);
1671 uprv_strcat(fullname
, filename
);
1674 uprv_strcpy(fullname
,filename
);
1677 ucbuf
= ucbuf_open(fullname
, &cp
,getShowWarning(),FALSE
,status
);
1679 if (U_FAILURE(*status
)) {
1680 error(line
, "couldn't open input file %s\n", filename
);
1684 uBuffer
= ucbuf_getBuffer(ucbuf
,&len
,status
);
1685 result
= string_open(state
->bundle
, tag
, uBuffer
, len
, comment
, status
);
1691 uprv_free(filename
);
1692 uprv_free(fullname
);
1701 U_STRING_DECL(k_type_string
, "string", 6);
1702 U_STRING_DECL(k_type_binary
, "binary", 6);
1703 U_STRING_DECL(k_type_bin
, "bin", 3);
1704 U_STRING_DECL(k_type_table
, "table", 5);
1705 U_STRING_DECL(k_type_table_no_fallback
, "table(nofallback)", 17);
1706 U_STRING_DECL(k_type_int
, "int", 3);
1707 U_STRING_DECL(k_type_integer
, "integer", 7);
1708 U_STRING_DECL(k_type_array
, "array", 5);
1709 U_STRING_DECL(k_type_alias
, "alias", 5);
1710 U_STRING_DECL(k_type_intvector
, "intvector", 9);
1711 U_STRING_DECL(k_type_import
, "import", 6);
1712 U_STRING_DECL(k_type_include
, "include", 7);
1714 /* Various non-standard processing plugins that create one or more special resources. */
1715 U_STRING_DECL(k_type_plugin_uca_rules
, "process(uca_rules)", 18);
1716 U_STRING_DECL(k_type_plugin_collation
, "process(collation)", 18);
1717 U_STRING_DECL(k_type_plugin_transliterator
, "process(transliterator)", 23);
1718 U_STRING_DECL(k_type_plugin_dependency
, "process(dependency)", 19);
1720 typedef enum EResourceType
1726 RESTYPE_TABLE_NO_FALLBACK
,
1733 RESTYPE_PROCESS_UCA_RULES
,
1734 RESTYPE_PROCESS_COLLATION
,
1735 RESTYPE_PROCESS_TRANSLITERATOR
,
1736 RESTYPE_PROCESS_DEPENDENCY
,
1741 const char *nameChars
; /* only used for debugging */
1742 const UChar
*nameUChars
;
1743 ParseResourceFunction
*parseFunction
;
1744 } gResourceTypes
[] = {
1745 {"Unknown", NULL
, NULL
},
1746 {"string", k_type_string
, parseString
},
1747 {"binary", k_type_binary
, parseBinary
},
1748 {"table", k_type_table
, parseTable
},
1749 {"table(nofallback)", k_type_table_no_fallback
, NULL
}, /* parseFunction will never be called */
1750 {"integer", k_type_integer
, parseInteger
},
1751 {"array", k_type_array
, parseArray
},
1752 {"alias", k_type_alias
, parseAlias
},
1753 {"intvector", k_type_intvector
, parseIntVector
},
1754 {"import", k_type_import
, parseImport
},
1755 {"include", k_type_include
, parseInclude
},
1756 {"process(uca_rules)", k_type_plugin_uca_rules
, parseUCARules
},
1757 {"process(collation)", k_type_plugin_collation
, NULL
/* not implemented yet */},
1758 {"process(transliterator)", k_type_plugin_transliterator
, parseTransliterator
},
1759 {"process(dependency)", k_type_plugin_dependency
, parseDependency
},
1760 {"reserved", NULL
, NULL
}
1765 U_STRING_INIT(k_type_string
, "string", 6);
1766 U_STRING_INIT(k_type_binary
, "binary", 6);
1767 U_STRING_INIT(k_type_bin
, "bin", 3);
1768 U_STRING_INIT(k_type_table
, "table", 5);
1769 U_STRING_INIT(k_type_table_no_fallback
, "table(nofallback)", 17);
1770 U_STRING_INIT(k_type_int
, "int", 3);
1771 U_STRING_INIT(k_type_integer
, "integer", 7);
1772 U_STRING_INIT(k_type_array
, "array", 5);
1773 U_STRING_INIT(k_type_alias
, "alias", 5);
1774 U_STRING_INIT(k_type_intvector
, "intvector", 9);
1775 U_STRING_INIT(k_type_import
, "import", 6);
1776 U_STRING_INIT(k_type_include
, "include", 7);
1778 U_STRING_INIT(k_type_plugin_uca_rules
, "process(uca_rules)", 18);
1779 U_STRING_INIT(k_type_plugin_collation
, "process(collation)", 18);
1780 U_STRING_INIT(k_type_plugin_transliterator
, "process(transliterator)", 23);
1781 U_STRING_INIT(k_type_plugin_dependency
, "process(dependency)", 19);
1784 static inline UBool
isTable(enum EResourceType type
) {
1785 return (UBool
)(type
==RESTYPE_TABLE
|| type
==RESTYPE_TABLE_NO_FALLBACK
);
1788 static enum EResourceType
1789 parseResourceType(ParseState
* state
, UErrorCode
*status
)
1791 struct UString
*tokenValue
;
1792 struct UString comment
;
1793 enum EResourceType result
= RESTYPE_UNKNOWN
;
1795 ustr_init(&comment
);
1796 expect(state
, TOK_STRING
, &tokenValue
, &comment
, &line
, status
);
1798 if (U_FAILURE(*status
))
1800 return RESTYPE_UNKNOWN
;
1803 *status
= U_ZERO_ERROR
;
1805 /* Search for normal types */
1806 result
=RESTYPE_UNKNOWN
;
1807 while ((result
=(EResourceType
)(result
+1)) < RESTYPE_RESERVED
) {
1808 if (u_strcmp(tokenValue
->fChars
, gResourceTypes
[result
].nameUChars
) == 0) {
1812 /* Now search for the aliases */
1813 if (u_strcmp(tokenValue
->fChars
, k_type_int
) == 0) {
1814 result
= RESTYPE_INTEGER
;
1816 else if (u_strcmp(tokenValue
->fChars
, k_type_bin
) == 0) {
1817 result
= RESTYPE_BINARY
;
1819 else if (result
== RESTYPE_RESERVED
) {
1820 char tokenBuffer
[1024];
1821 u_austrncpy(tokenBuffer
, tokenValue
->fChars
, sizeof(tokenBuffer
));
1822 tokenBuffer
[sizeof(tokenBuffer
) - 1] = 0;
1823 *status
= U_INVALID_FORMAT_ERROR
;
1824 error(line
, "unknown resource type '%s'", tokenBuffer
);
1830 /* parse a non-top-level resource */
1831 static struct SResource
*
1832 parseResource(ParseState
* state
, char *tag
, const struct UString
*comment
, UErrorCode
*status
)
1834 enum ETokenType token
;
1835 enum EResourceType resType
= RESTYPE_UNKNOWN
;
1836 ParseResourceFunction
*parseFunction
= NULL
;
1837 struct UString
*tokenValue
;
1842 token
= getToken(state
, &tokenValue
, NULL
, &startline
, status
);
1845 printf(" resource %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1848 /* name . [ ':' type ] '{' resource '}' */
1849 /* This function parses from the colon onwards. If the colon is present, parse the
1850 type then try to parse a resource of that type. If there is no explicit type,
1851 work it out using the lookahead tokens. */
1855 *status
= U_INVALID_FORMAT_ERROR
;
1856 error(startline
, "Unexpected EOF encountered");
1860 *status
= U_INVALID_FORMAT_ERROR
;
1864 resType
= parseResourceType(state
, status
);
1865 expect(state
, TOK_OPEN_BRACE
, &tokenValue
, NULL
, &startline
, status
);
1867 if (U_FAILURE(*status
))
1874 case TOK_OPEN_BRACE
:
1878 *status
= U_INVALID_FORMAT_ERROR
;
1879 error(startline
, "syntax error while reading a resource, expected '{' or ':'");
1884 if (resType
== RESTYPE_UNKNOWN
)
1886 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
1887 We could have any of the following:
1888 { { => array (nested)
1890 { string , => string array
1894 { string :/{ => table
1895 { string } => string
1898 token
= peekToken(state
, 0, NULL
, &line
, NULL
,status
);
1900 if (U_FAILURE(*status
))
1905 if (token
== TOK_OPEN_BRACE
|| token
== TOK_COLON
||token
==TOK_CLOSE_BRACE
)
1907 resType
= RESTYPE_ARRAY
;
1909 else if (token
== TOK_STRING
)
1911 token
= peekToken(state
, 1, NULL
, &line
, NULL
, status
);
1913 if (U_FAILURE(*status
))
1920 case TOK_COMMA
: resType
= RESTYPE_ARRAY
; break;
1921 case TOK_OPEN_BRACE
: resType
= RESTYPE_TABLE
; break;
1922 case TOK_CLOSE_BRACE
: resType
= RESTYPE_STRING
; break;
1923 case TOK_COLON
: resType
= RESTYPE_TABLE
; break;
1925 *status
= U_INVALID_FORMAT_ERROR
;
1926 error(line
, "Unexpected token after string, expected ',', '{' or '}'");
1932 *status
= U_INVALID_FORMAT_ERROR
;
1933 error(line
, "Unexpected token after '{'");
1937 /* printf("Type guessed as %s\n", resourceNames[resType]); */
1938 } else if(resType
== RESTYPE_TABLE_NO_FALLBACK
) {
1939 *status
= U_INVALID_FORMAT_ERROR
;
1940 error(startline
, "error: %s resource type not valid except on top bundle level", gResourceTypes
[resType
].nameChars
);
1945 /* We should now know what we need to parse next, so call the appropriate parser
1946 function and return. */
1947 parseFunction
= gResourceTypes
[resType
].parseFunction
;
1948 if (parseFunction
!= NULL
) {
1949 return parseFunction(state
, tag
, startline
, comment
, status
);
1952 *status
= U_INTERNAL_PROGRAM_ERROR
;
1953 error(startline
, "internal error: %s resource type found and not handled", gResourceTypes
[resType
].nameChars
);
1959 /* parse the top-level resource */
1961 parse(UCHARBUF
*buf
, const char *inputDir
, const char *outputDir
, const char *filename
,
1962 UBool makeBinaryCollation
, UBool omitCollationRules
, UErrorCode
*status
)
1964 struct UString
*tokenValue
;
1965 struct UString comment
;
1967 enum EResourceType bundleType
;
1968 enum ETokenType token
;
1973 for (i
= 0; i
< MAX_LOOKAHEAD
+ 1; i
++)
1975 ustr_init(&state
.lookahead
[i
].value
);
1976 ustr_init(&state
.lookahead
[i
].comment
);
1979 initLookahead(&state
, buf
, status
);
1981 state
.inputdir
= inputDir
;
1982 state
.inputdirLength
= (state
.inputdir
!= NULL
) ? (uint32_t)uprv_strlen(state
.inputdir
) : 0;
1983 state
.outputdir
= outputDir
;
1984 state
.outputdirLength
= (state
.outputdir
!= NULL
) ? (uint32_t)uprv_strlen(state
.outputdir
) : 0;
1985 state
.filename
= filename
;
1986 state
.makeBinaryCollation
= makeBinaryCollation
;
1987 state
.omitCollationRules
= omitCollationRules
;
1989 ustr_init(&comment
);
1990 expect(&state
, TOK_STRING
, &tokenValue
, &comment
, NULL
, status
);
1992 state
.bundle
= new SRBRoot(&comment
, FALSE
, *status
);
1994 if (state
.bundle
== NULL
|| U_FAILURE(*status
))
2000 state
.bundle
->setLocale(tokenValue
->fChars
, *status
);
2002 /* The following code is to make Empty bundle work no matter with :table specifer or not */
2003 token
= getToken(&state
, NULL
, NULL
, &line
, status
);
2004 if(token
==TOK_COLON
) {
2005 *status
=U_ZERO_ERROR
;
2006 bundleType
=parseResourceType(&state
, status
);
2008 if(isTable(bundleType
))
2010 expect(&state
, TOK_OPEN_BRACE
, NULL
, NULL
, &line
, status
);
2014 *status
=U_PARSE_ERROR
;
2015 error(line
, "parse error. Stopped parsing with %s", u_errorName(*status
));
2021 if(token
==TOK_OPEN_BRACE
)
2023 *status
=U_ZERO_ERROR
;
2024 bundleType
=RESTYPE_TABLE
;
2028 /* neither colon nor open brace */
2029 *status
=U_PARSE_ERROR
;
2030 bundleType
=RESTYPE_UNKNOWN
;
2031 error(line
, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status
));
2035 if (U_FAILURE(*status
))
2037 delete state
.bundle
;
2041 if(bundleType
==RESTYPE_TABLE_NO_FALLBACK
) {
2043 * Parse a top-level table with the table(nofallback) declaration.
2044 * This is the same as a regular table, but also sets the
2045 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2047 state
.bundle
->fNoFallback
=TRUE
;
2049 /* top-level tables need not handle special table names like "collations" */
2050 assert(!state
.bundle
->fIsPoolBundle
);
2051 assert(state
.bundle
->fRoot
->fType
== URES_TABLE
);
2052 TableResource
*rootTable
= static_cast<TableResource
*>(state
.bundle
->fRoot
);
2053 realParseTable(&state
, rootTable
, NULL
, line
, status
);
2054 if(dependencyArray
!=NULL
){
2055 rootTable
->add(dependencyArray
, 0, *status
);
2056 dependencyArray
= NULL
;
2058 if (U_FAILURE(*status
))
2060 delete state
.bundle
;
2061 res_close(dependencyArray
);
2065 if (getToken(&state
, NULL
, NULL
, &line
, status
) != TOK_EOF
)
2067 warning(line
, "extraneous text after resource bundle (perhaps unmatched braces)");
2069 *status
= U_INVALID_FORMAT_ERROR
;
2074 cleanupLookahead(&state
);
2075 ustr_deinit(&comment
);
2076 return state
.bundle
;