1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 1998-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
13 * Modification History:
15 * Date Name Description
16 * 05/26/99 stephen Creation.
17 * 02/25/00 weiv Overhaul to write udata
18 * 5/10/01 Ram removed ustdio dependency
19 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
20 *******************************************************************************
23 // Safer use of UnicodeString.
24 #ifndef UNISTR_FROM_CHAR_EXPLICIT
25 # define UNISTR_FROM_CHAR_EXPLICIT explicit
28 // Less important, but still a good idea.
29 #ifndef UNISTR_FROM_STRING_EXPLICIT
30 # define UNISTR_FROM_STRING_EXPLICIT explicit
45 #include "unicode/stringpiece.h"
46 #include "unicode/unistr.h"
47 #include "unicode/ustring.h"
48 #include "unicode/uscript.h"
49 #include "unicode/utf16.h"
50 #include "unicode/putil.h"
52 #include "collationbuilder.h"
53 #include "collationdata.h"
54 #include "collationdatareader.h"
55 #include "collationdatawriter.h"
56 #include "collationfastlatinbuilder.h"
57 #include "collationinfo.h"
58 #include "collationroot.h"
59 #include "collationruleparser.h"
60 #include "collationtailoring.h"
63 /* Number of tokens to read ahead of the current stream position */
64 #define MAX_LOOKAHEAD 3
74 #define STARTCOMMAND 0x005B
75 #define ENDCOMMAND 0x005D
76 #define OPENSQBRACKET 0x005B
77 #define CLOSESQBRACKET 0x005D
79 using icu::CharString
;
80 using icu::LocalMemory
;
81 using icu::LocalPointer
;
82 using icu::LocalUCHARBUFPointer
;
83 using icu::StringPiece
;
84 using icu::UnicodeString
;
90 struct UString comment
;
94 /* keep in sync with token defines in read.h */
95 const char *tokenNames
[TOK_TOKEN_COUNT
] =
97 "string", /* A string token, such as "MonthNames" */
98 "'{'", /* An opening brace character */
99 "'}'", /* A closing brace character */
103 "<end of file>", /* End of the file has been reached successfully */
107 /* Just to store "TRUE" */
108 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
111 struct Lookahead lookahead
[MAX_LOOKAHEAD
+ 1];
112 uint32_t lookaheadPosition
;
114 struct SRBRoot
*bundle
;
115 const char *inputdir
;
116 uint32_t inputdirLength
;
117 const char *outputdir
;
118 uint32_t outputdirLength
;
119 const char *filename
;
120 UBool makeBinaryCollation
;
121 UBool omitCollationRules
;
124 typedef struct SResource
*
125 ParseResourceFunction(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
);
127 static struct SResource
*parseResource(ParseState
* state
, char *tag
, const struct UString
*comment
, UErrorCode
*status
);
129 /* The nature of the lookahead buffer:
130 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
131 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
132 When getToken is called, the current pointer is moved to the next slot and the
133 old slot is filled with the next token from the reader by calling getNextToken.
134 The token values are stored in the slot, which means that token values don't
135 survive a call to getToken, ie.
139 getToken(&value, NULL, status);
140 getToken(NULL, NULL, status); bad - value is now a different string
143 initLookahead(ParseState
* state
, UCHARBUF
*buf
, UErrorCode
*status
)
145 static uint32_t initTypeStrings
= 0;
148 if (!initTypeStrings
)
153 state
->lookaheadPosition
= 0;
158 for (i
= 0; i
< MAX_LOOKAHEAD
; i
++)
160 state
->lookahead
[i
].type
= getNextToken(state
->buffer
, &state
->lookahead
[i
].value
, &state
->lookahead
[i
].line
, &state
->lookahead
[i
].comment
, status
);
161 if (U_FAILURE(*status
))
167 *status
= U_ZERO_ERROR
;
171 cleanupLookahead(ParseState
* state
)
174 for (i
= 0; i
<= MAX_LOOKAHEAD
; i
++)
176 ustr_deinit(&state
->lookahead
[i
].value
);
177 ustr_deinit(&state
->lookahead
[i
].comment
);
182 static enum ETokenType
183 getToken(ParseState
* state
, struct UString
**tokenValue
, struct UString
* comment
, uint32_t *linenumber
, UErrorCode
*status
)
185 enum ETokenType result
;
188 result
= state
->lookahead
[state
->lookaheadPosition
].type
;
190 if (tokenValue
!= NULL
)
192 *tokenValue
= &state
->lookahead
[state
->lookaheadPosition
].value
;
195 if (linenumber
!= NULL
)
197 *linenumber
= state
->lookahead
[state
->lookaheadPosition
].line
;
202 ustr_cpy(comment
, &(state
->lookahead
[state
->lookaheadPosition
].comment
), status
);
205 i
= (state
->lookaheadPosition
+ MAX_LOOKAHEAD
) % (MAX_LOOKAHEAD
+ 1);
206 state
->lookaheadPosition
= (state
->lookaheadPosition
+ 1) % (MAX_LOOKAHEAD
+ 1);
207 ustr_setlen(&state
->lookahead
[i
].comment
, 0, status
);
208 ustr_setlen(&state
->lookahead
[i
].value
, 0, status
);
209 state
->lookahead
[i
].type
= getNextToken(state
->buffer
, &state
->lookahead
[i
].value
, &state
->lookahead
[i
].line
, &state
->lookahead
[i
].comment
, status
);
211 /* printf("getToken, returning %s\n", tokenNames[result]); */
216 static enum ETokenType
217 peekToken(ParseState
* state
, uint32_t lookaheadCount
, struct UString
**tokenValue
, uint32_t *linenumber
, struct UString
*comment
, UErrorCode
*status
)
219 uint32_t i
= (state
->lookaheadPosition
+ lookaheadCount
) % (MAX_LOOKAHEAD
+ 1);
221 if (U_FAILURE(*status
))
226 if (lookaheadCount
>= MAX_LOOKAHEAD
)
228 *status
= U_INTERNAL_PROGRAM_ERROR
;
232 if (tokenValue
!= NULL
)
234 *tokenValue
= &state
->lookahead
[i
].value
;
237 if (linenumber
!= NULL
)
239 *linenumber
= state
->lookahead
[i
].line
;
243 ustr_cpy(comment
, &(state
->lookahead
[state
->lookaheadPosition
].comment
), status
);
246 return state
->lookahead
[i
].type
;
250 expect(ParseState
* state
, enum ETokenType expectedToken
, struct UString
**tokenValue
, struct UString
*comment
, uint32_t *linenumber
, UErrorCode
*status
)
254 enum ETokenType token
= getToken(state
, tokenValue
, comment
, &line
, status
);
256 if (linenumber
!= NULL
)
261 if (U_FAILURE(*status
))
266 if (token
!= expectedToken
)
268 *status
= U_INVALID_FORMAT_ERROR
;
269 error(line
, "expecting %s, got %s", tokenNames
[expectedToken
], tokenNames
[token
]);
273 *status
= U_ZERO_ERROR
;
277 static char *getInvariantString(ParseState
* state
, uint32_t *line
, struct UString
*comment
,
278 int32_t &stringLength
, UErrorCode
*status
)
280 struct UString
*tokenValue
;
283 expect(state
, TOK_STRING
, &tokenValue
, comment
, line
, status
);
285 if (U_FAILURE(*status
))
290 if(!uprv_isInvariantUString(tokenValue
->fChars
, tokenValue
->fLength
)) {
291 *status
= U_INVALID_FORMAT_ERROR
;
292 error(*line
, "invariant characters required for table keys, binary data, etc.");
296 result
= static_cast<char *>(uprv_malloc(tokenValue
->fLength
+1));
300 *status
= U_MEMORY_ALLOCATION_ERROR
;
304 u_UCharsToChars(tokenValue
->fChars
, result
, tokenValue
->fLength
+1);
305 stringLength
= tokenValue
->fLength
;
309 static struct SResource
*
310 parseUCARules(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* /*comment*/, UErrorCode
*status
)
312 struct SResource
*result
= NULL
;
313 struct UString
*tokenValue
;
314 FileStream
*file
= NULL
;
315 char filename
[256] = { '\0' };
316 char cs
[128] = { '\0' };
318 UBool quoted
= FALSE
;
319 UCHARBUF
*ucbuf
=NULL
;
321 const char* cp
= NULL
;
322 UChar
*pTarget
= NULL
;
323 UChar
*target
= NULL
;
324 UChar
*targetLimit
= NULL
;
327 expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
);
330 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
333 if (U_FAILURE(*status
))
337 /* make the filename including the directory */
338 if (state
->inputdir
!= NULL
)
340 uprv_strcat(filename
, state
->inputdir
);
342 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
344 uprv_strcat(filename
, U_FILE_SEP_STRING
);
348 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
350 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
352 if (U_FAILURE(*status
))
356 uprv_strcat(filename
, cs
);
358 if(state
->omitCollationRules
) {
362 ucbuf
= ucbuf_open(filename
, &cp
, getShowWarning(),FALSE
, status
);
364 if (U_FAILURE(*status
)) {
365 error(line
, "An error occurred while opening the input file %s\n", filename
);
369 /* We allocate more space than actually required
370 * since the actual size needed for storing UChars
371 * is not known in UTF-8 byte stream
373 size
= ucbuf_size(ucbuf
) + 1;
374 pTarget
= (UChar
*) uprv_malloc(U_SIZEOF_UCHAR
* size
);
375 uprv_memset(pTarget
, 0, size
*U_SIZEOF_UCHAR
);
377 targetLimit
= pTarget
+size
;
379 /* read the rules into the buffer */
380 while (target
< targetLimit
)
382 c
= ucbuf_getc(ucbuf
, status
);
384 quoted
= (UBool
)!quoted
;
386 /* weiv (06/26/2002): adding the following:
387 * - preserving spaces in commands [...]
388 * - # comments until the end of line
390 if (c
== STARTCOMMAND
&& !quoted
)
393 * closing bracket will be handled by the
394 * append at the end of the loop
396 while(c
!= ENDCOMMAND
) {
397 U_APPEND_CHAR32_ONLY(c
, target
);
398 c
= ucbuf_getc(ucbuf
, status
);
401 else if (c
== HASH
&& !quoted
) {
403 while(c
!= CR
&& c
!= LF
) {
404 c
= ucbuf_getc(ucbuf
, status
);
408 else if (c
== ESCAPE
)
410 c
= unescape(ucbuf
, status
);
412 if (c
== (UChar32
)U_ERR
)
415 T_FileStream_close(file
);
419 else if (!quoted
&& (c
== SPACE
|| c
== TAB
|| c
== CR
|| c
== LF
))
421 /* ignore spaces carriage returns
422 * and line feed unless in the form \uXXXX
427 /* Append UChar * after dissembling if c > 0xffff*/
428 if (c
!= (UChar32
)U_EOF
)
430 U_APPEND_CHAR32_ONLY(c
, target
);
438 /* terminate the string */
439 if(target
< targetLimit
){
443 result
= string_open(state
->bundle
, tag
, pTarget
, (int32_t)(target
- pTarget
), NULL
, status
);
448 T_FileStream_close(file
);
453 static struct SResource
*
454 parseTransliterator(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* /*comment*/, UErrorCode
*status
)
456 struct SResource
*result
= NULL
;
457 struct UString
*tokenValue
;
458 FileStream
*file
= NULL
;
459 char filename
[256] = { '\0' };
460 char cs
[128] = { '\0' };
462 UCHARBUF
*ucbuf
=NULL
;
463 const char* cp
= NULL
;
464 UChar
*pTarget
= NULL
;
465 const UChar
*pSource
= NULL
;
468 expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
);
471 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
474 if (U_FAILURE(*status
))
478 /* make the filename including the directory */
479 if (state
->inputdir
!= NULL
)
481 uprv_strcat(filename
, state
->inputdir
);
483 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
485 uprv_strcat(filename
, U_FILE_SEP_STRING
);
489 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
491 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
493 if (U_FAILURE(*status
))
497 uprv_strcat(filename
, cs
);
500 ucbuf
= ucbuf_open(filename
, &cp
, getShowWarning(),FALSE
, status
);
502 if (U_FAILURE(*status
)) {
503 error(line
, "An error occurred while opening the input file %s\n", filename
);
507 /* We allocate more space than actually required
508 * since the actual size needed for storing UChars
509 * is not known in UTF-8 byte stream
511 pSource
= ucbuf_getBuffer(ucbuf
, &size
, status
);
512 pTarget
= (UChar
*) uprv_malloc(U_SIZEOF_UCHAR
* (size
+ 1));
513 uprv_memset(pTarget
, 0, size
*U_SIZEOF_UCHAR
);
515 #if !UCONFIG_NO_TRANSLITERATION
516 size
= utrans_stripRules(pSource
, size
, pTarget
, status
);
519 fprintf(stderr
, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
521 result
= string_open(state
->bundle
, tag
, pTarget
, size
, NULL
, status
);
525 T_FileStream_close(file
);
529 static ArrayResource
* dependencyArray
= NULL
;
531 static struct SResource
*
532 parseDependency(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
534 struct SResource
*result
= NULL
;
535 struct SResource
*elem
= NULL
;
536 struct UString
*tokenValue
;
538 char filename
[256] = { '\0' };
539 char cs
[128] = { '\0' };
541 expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
);
544 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
547 if (U_FAILURE(*status
))
551 /* make the filename including the directory */
552 if (state
->outputdir
!= NULL
)
554 uprv_strcat(filename
, state
->outputdir
);
556 if (state
->outputdir
[state
->outputdirLength
- 1] != U_FILE_SEP_CHAR
)
558 uprv_strcat(filename
, U_FILE_SEP_STRING
);
562 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
564 if (U_FAILURE(*status
))
568 uprv_strcat(filename
, cs
);
569 if(!T_FileStream_file_exists(filename
)){
571 error(line
, "The dependency file %s does not exist. Please make sure it exists.\n",filename
);
573 warning(line
, "The dependency file %s does not exist. Please make sure it exists.\n",filename
);
576 if(dependencyArray
==NULL
){
577 dependencyArray
= array_open(state
->bundle
, "%%DEPENDENCY", NULL
, status
);
580 result
= string_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
582 elem
= string_open(state
->bundle
, NULL
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
584 dependencyArray
->add(elem
);
586 if (U_FAILURE(*status
))
590 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
593 static struct SResource
*
594 parseString(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
596 struct UString
*tokenValue
;
597 struct SResource
*result
= NULL
;
599 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
601 return parseUCARules(tag, startline, status);
604 printf(" string %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
606 expect(state
, TOK_STRING
, &tokenValue
, NULL
, NULL
, status
);
608 if (U_SUCCESS(*status
))
610 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
611 doesn't survive expect either) */
613 result
= string_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
614 if(U_SUCCESS(*status
) && result
) {
615 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
617 if (U_FAILURE(*status
))
628 static struct SResource
*
629 parseAlias(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
631 struct UString
*tokenValue
;
632 struct SResource
*result
= NULL
;
634 expect(state
, TOK_STRING
, &tokenValue
, NULL
, NULL
, status
);
637 printf(" alias %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
640 if (U_SUCCESS(*status
))
642 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
643 doesn't survive expect either) */
645 result
= alias_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
647 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
649 if (U_FAILURE(*status
))
659 #if !UCONFIG_NO_COLLATION
663 static struct SResource
* resLookup(struct SResource
* res
, const char* key
){
664 if (res
== res_none() || !res
->isTable()) {
668 TableResource
*list
= static_cast<TableResource
*>(res
);
669 SResource
*current
= list
->fFirst
;
670 while (current
!= NULL
) {
671 if (uprv_strcmp(((list
->fRoot
->fKeys
) + (current
->fKey
)), key
) == 0) {
674 current
= current
->fNext
;
679 class GenrbImporter
: public icu::CollationRuleParser::Importer
{
681 GenrbImporter(const char *in
, const char *out
) : inputDir(in
), outputDir(out
) {}
682 virtual ~GenrbImporter();
683 virtual void getRules(
684 const char *localeID
, const char *collationType
,
685 UnicodeString
&rules
,
686 const char *&errorReason
, UErrorCode
&errorCode
);
689 const char *inputDir
;
690 const char *outputDir
;
693 GenrbImporter::~GenrbImporter() {}
696 GenrbImporter::getRules(
697 const char *localeID
, const char *collationType
,
698 UnicodeString
&rules
,
699 const char *& /*errorReason*/, UErrorCode
&errorCode
) {
700 CharString
filename(localeID
, errorCode
);
701 for(int32_t i
= 0; i
< filename
.length(); i
++){
702 if(filename
[i
] == '-'){
703 filename
.data()[i
] = '_';
706 filename
.append(".txt", errorCode
);
707 if (U_FAILURE(errorCode
)) {
710 CharString inputDirBuf
;
711 CharString openFileName
;
712 if(inputDir
== NULL
) {
713 const char *filenameBegin
= uprv_strrchr(filename
.data(), U_FILE_SEP_CHAR
);
714 if (filenameBegin
!= NULL
) {
716 * When a filename ../../../data/root.txt is specified,
717 * we presume that the input directory is ../../../data
718 * This is very important when the resource file includes
719 * another file, like UCARules.txt or thaidict.brk.
721 StringPiece dir
= filename
.toStringPiece();
722 const char *filenameLimit
= filename
.data() + filename
.length();
723 dir
.remove_suffix((int32_t)(filenameLimit
- filenameBegin
));
724 inputDirBuf
.append(dir
, errorCode
);
725 inputDir
= inputDirBuf
.data();
728 int32_t dirlen
= (int32_t)uprv_strlen(inputDir
);
730 if((filename
[0] != U_FILE_SEP_CHAR
) && (inputDir
[dirlen
-1] !='.')) {
732 * append the input dir to openFileName if the first char in
733 * filename is not file separator char and the last char input directory is not '.'.
734 * This is to support :
735 * genrb -s. /home/icu/data
737 * The user cannot mix notations like
738 * genrb -s. /icu/data --- the absolute path specified. -s redundant
740 * genrb -s. icu/data --- start from CWD and look in icu/data dir
742 openFileName
.append(inputDir
, dirlen
, errorCode
);
743 if(inputDir
[dirlen
-1] != U_FILE_SEP_CHAR
) {
744 openFileName
.append(U_FILE_SEP_CHAR
, errorCode
);
748 openFileName
.append(filename
, errorCode
);
749 if(U_FAILURE(errorCode
)) {
752 // printf("GenrbImporter::getRules(%s, %s) reads %s\n", localeID, collationType, openFileName.data());
754 LocalUCHARBUFPointer
ucbuf(
755 ucbuf_open(openFileName
.data(), &cp
, getShowWarning(), TRUE
, &errorCode
));
756 if(errorCode
== U_FILE_ACCESS_ERROR
) {
757 fprintf(stderr
, "couldn't open file %s\n", openFileName
.data());
760 if (ucbuf
.isNull() || U_FAILURE(errorCode
)) {
761 fprintf(stderr
, "An error occurred processing file %s. Error: %s\n", openFileName
.data(), u_errorName(errorCode
));
765 /* Parse the data into an SRBRoot */
766 LocalPointer
<SRBRoot
> data(
767 parse(ucbuf
.getAlias(), inputDir
, outputDir
, filename
.data(), FALSE
, FALSE
, &errorCode
));
768 if (U_FAILURE(errorCode
)) {
772 struct SResource
*root
= data
->fRoot
;
773 struct SResource
*collations
= resLookup(root
, "collations");
774 if (collations
!= NULL
) {
775 struct SResource
*collation
= resLookup(collations
, collationType
);
776 if (collation
!= NULL
) {
777 struct SResource
*sequence
= resLookup(collation
, "Sequence");
778 if (sequence
!= NULL
&& sequence
->isString()) {
779 // No string pointer aliasing so that we need not hold onto the resource bundle.
780 StringResource
*sr
= static_cast<StringResource
*>(sequence
);
787 // Quick-and-dirty escaping function.
788 // Assumes that we are on an ASCII-based platform.
790 escape(const UChar
*s
, char *buffer
) {
791 int32_t length
= u_strlen(s
);
795 U16_NEXT(s
, i
, length
, c
);
799 } else if (0x20 <= c
&& c
<= 0x7e) {
801 *buffer
++ = (char)c
; // assumes ASCII-based platform
803 buffer
+= sprintf(buffer
, "\\u%04X", (int)c
);
810 #endif // !UCONFIG_NO_COLLATION
812 static TableResource
*
813 addCollation(ParseState
* state
, TableResource
*result
, const char *collationType
,
814 uint32_t startline
, UErrorCode
*status
)
816 // TODO: Use LocalPointer for result, or make caller close it when there is a failure.
817 struct SResource
*member
= NULL
;
818 struct UString
*tokenValue
;
819 struct UString comment
;
820 enum ETokenType token
;
823 UBool haveRules
= FALSE
;
824 UVersionInfo version
;
827 /* '{' . (name resource)* '}' */
828 version
[0]=0; version
[1]=0; version
[2]=0; version
[3]=0;
833 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
835 if (token
== TOK_CLOSE_BRACE
)
840 if (token
!= TOK_STRING
)
843 *status
= U_INVALID_FORMAT_ERROR
;
845 if (token
== TOK_EOF
)
847 error(startline
, "unterminated table");
851 error(line
, "Unexpected token %s", tokenNames
[token
]);
857 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
859 if (U_FAILURE(*status
))
865 member
= parseResource(state
, subtag
, NULL
, status
);
867 if (U_FAILURE(*status
))
874 // Ignore the parsed resources, continue parsing.
876 else if (uprv_strcmp(subtag
, "Version") == 0 && member
->isString())
878 StringResource
*sr
= static_cast<StringResource
*>(member
);
880 int32_t length
= sr
->length();
882 if (length
>= UPRV_LENGTHOF(ver
))
884 length
= UPRV_LENGTHOF(ver
) - 1;
887 sr
->fString
.extract(0, length
, ver
, UPRV_LENGTHOF(ver
), US_INV
);
888 u_versionFromString(version
, ver
);
890 result
->add(member
, line
, *status
);
893 else if(uprv_strcmp(subtag
, "%%CollationBin")==0)
895 /* discard duplicate %%CollationBin if any*/
897 else if (uprv_strcmp(subtag
, "Sequence") == 0 && member
->isString())
899 StringResource
*sr
= static_cast<StringResource
*>(member
);
902 // Defer building the collator until we have seen
903 // all sub-elements of the collation table, including the Version.
904 /* in order to achieve smaller data files, we can direct genrb */
905 /* to omit collation rules */
906 if(!state
->omitCollationRules
) {
907 result
->add(member
, line
, *status
);
911 else // Just copy non-special items.
913 result
->add(member
, line
, *status
);
916 res_close(member
); // TODO: use LocalPointer
917 if (U_FAILURE(*status
))
924 if (!haveRules
) { return result
; }
926 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
927 warning(line
, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
930 // CLDR ticket #3949, ICU ticket #8082:
931 // Do not build collation binary data for for-import-only "private" collation rule strings.
932 if (uprv_strncmp(collationType
, "private-", 8) == 0) {
934 printf("Not building %s~%s collation binary\n", state
->filename
, collationType
);
939 if(!state
->makeBinaryCollation
) {
941 printf("Not building %s~%s collation binary\n", state
->filename
, collationType
);
945 UErrorCode intStatus
= U_ZERO_ERROR
;
946 UParseError parseError
;
947 uprv_memset(&parseError
, 0, sizeof(parseError
));
948 GenrbImporter
importer(state
->inputdir
, state
->outputdir
);
949 const icu::CollationTailoring
*base
= icu::CollationRoot::getRoot(intStatus
);
950 if(U_FAILURE(intStatus
)) {
951 error(line
, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus
));
953 return NULL
; // TODO: use LocalUResourceBundlePointer for result
955 icu::CollationBuilder
builder(base
, intStatus
);
956 if(uprv_strncmp(collationType
, "search", 6) == 0) {
957 builder
.disableFastLatin(); // build fast-Latin table unless search collator
959 LocalPointer
<icu::CollationTailoring
> t(
960 builder
.parseAndBuild(rules
, version
, &importer
, &parseError
, intStatus
));
961 if(U_FAILURE(intStatus
)) {
962 const char *reason
= builder
.getErrorReason();
963 if(reason
== NULL
) { reason
= ""; }
964 error(line
, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s %s",
965 state
->filename
, collationType
,
966 (long)parseError
.offset
, u_errorName(intStatus
), reason
);
967 if(parseError
.preContext
[0] != 0 || parseError
.postContext
[0] != 0) {
968 // Print pre- and post-context.
969 char preBuffer
[100], postBuffer
[100];
970 escape(parseError
.preContext
, preBuffer
);
971 escape(parseError
.postContext
, postBuffer
);
972 error(line
, " error context: \"...%s\" ! \"%s...\"", preBuffer
, postBuffer
);
974 if(isStrict() || t
.isNull()) {
980 icu::LocalMemory
<uint8_t> buffer
;
981 int32_t capacity
= 100000;
982 uint8_t *dest
= buffer
.allocateInsteadAndCopy(capacity
);
984 fprintf(stderr
, "memory allocation (%ld bytes) for file contents failed\n",
986 *status
= U_MEMORY_ALLOCATION_ERROR
;
990 int32_t indexes
[icu::CollationDataReader::IX_TOTAL_SIZE
+ 1];
991 int32_t totalSize
= icu::CollationDataWriter::writeTailoring(
992 *t
, *t
->settings
, indexes
, dest
, capacity
, intStatus
);
993 if(intStatus
== U_BUFFER_OVERFLOW_ERROR
) {
994 intStatus
= U_ZERO_ERROR
;
995 capacity
= totalSize
;
996 dest
= buffer
.allocateInsteadAndCopy(capacity
);
998 fprintf(stderr
, "memory allocation (%ld bytes) for file contents failed\n",
1000 *status
= U_MEMORY_ALLOCATION_ERROR
;
1004 totalSize
= icu::CollationDataWriter::writeTailoring(
1005 *t
, *t
->settings
, indexes
, dest
, capacity
, intStatus
);
1007 if(U_FAILURE(intStatus
)) {
1008 fprintf(stderr
, "CollationDataWriter::writeTailoring() failed: %s\n",
1009 u_errorName(intStatus
));
1014 printf("%s~%s collation tailoring part sizes:\n", state
->filename
, collationType
);
1015 icu::CollationInfo::printSizes(totalSize
, indexes
);
1016 if(t
->settings
->hasReordering()) {
1017 printf("%s~%s collation reordering ranges:\n", state
->filename
, collationType
);
1018 icu::CollationInfo::printReorderRanges(
1019 *t
->data
, t
->settings
->reorderCodes
, t
->settings
->reorderCodesLength
);
1021 #if 0 // debugging output
1023 printf("%s~%s collation tailoring part sizes:\n", state
->filename
, collationType
);
1024 icu::CollationInfo::printSizes(totalSize
, indexes
);
1027 struct SResource
*collationBin
= bin_open(state
->bundle
, "%%CollationBin", totalSize
, dest
, NULL
, NULL
, status
);
1028 result
->add(collationBin
, line
, *status
);
1029 if (U_FAILURE(*status
)) {
1038 keepCollationType(const char * /*type*/) {
1042 static struct SResource
*
1043 parseCollationElements(ParseState
* state
, char *tag
, uint32_t startline
, UBool newCollation
, UErrorCode
*status
)
1045 TableResource
*result
= NULL
;
1046 struct SResource
*member
= NULL
;
1047 struct UString
*tokenValue
;
1048 struct UString comment
;
1049 enum ETokenType token
;
1050 char subtag
[1024], typeKeyword
[1024];
1053 result
= table_open(state
->bundle
, tag
, NULL
, status
);
1055 if (result
== NULL
|| U_FAILURE(*status
))
1060 printf(" collation elements %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1063 return addCollation(state
, result
, "(no type)", startline
, status
);
1067 ustr_init(&comment
);
1068 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
1070 if (token
== TOK_CLOSE_BRACE
)
1075 if (token
!= TOK_STRING
)
1078 *status
= U_INVALID_FORMAT_ERROR
;
1080 if (token
== TOK_EOF
)
1082 error(startline
, "unterminated table");
1086 error(line
, "Unexpected token %s", tokenNames
[token
]);
1092 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
1094 if (U_FAILURE(*status
))
1100 if (uprv_strcmp(subtag
, "default") == 0)
1102 member
= parseResource(state
, subtag
, NULL
, status
);
1104 if (U_FAILURE(*status
))
1110 result
->add(member
, line
, *status
);
1114 token
= peekToken(state
, 0, &tokenValue
, &line
, &comment
, status
);
1115 /* this probably needs to be refactored or recursively use the parser */
1116 /* first we assume that our collation table won't have the explicit type */
1117 /* then, we cannot handle aliases */
1118 if(token
== TOK_OPEN_BRACE
) {
1119 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
1120 TableResource
*collationRes
;
1121 if (keepCollationType(subtag
)) {
1122 collationRes
= table_open(state
->bundle
, subtag
, NULL
, status
);
1124 collationRes
= NULL
;
1126 // need to parse the collation data regardless
1127 collationRes
= addCollation(state
, collationRes
, subtag
, startline
, status
);
1128 if (collationRes
!= NULL
) {
1129 result
->add(collationRes
, startline
, *status
);
1131 } else if(token
== TOK_COLON
) { /* right now, we'll just try to see if we have aliases */
1132 /* we could have a table too */
1133 token
= peekToken(state
, 1, &tokenValue
, &line
, &comment
, status
);
1134 u_UCharsToChars(tokenValue
->fChars
, typeKeyword
, u_strlen(tokenValue
->fChars
) + 1);
1135 if(uprv_strcmp(typeKeyword
, "alias") == 0) {
1136 member
= parseResource(state
, subtag
, NULL
, status
);
1137 if (U_FAILURE(*status
))
1143 result
->add(member
, line
, *status
);
1146 *status
= U_INVALID_FORMAT_ERROR
;
1151 *status
= U_INVALID_FORMAT_ERROR
;
1156 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1158 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1160 if (U_FAILURE(*status
))
1169 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1170 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1171 static struct SResource
*
1172 realParseTable(ParseState
* state
, TableResource
*table
, char *tag
, uint32_t startline
, UErrorCode
*status
)
1174 struct SResource
*member
= NULL
;
1175 struct UString
*tokenValue
=NULL
;
1176 struct UString comment
;
1177 enum ETokenType token
;
1180 UBool readToken
= FALSE
;
1182 /* '{' . (name resource)* '}' */
1185 printf(" parsing table %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1189 ustr_init(&comment
);
1190 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
1192 if (token
== TOK_CLOSE_BRACE
)
1195 warning(startline
, "Encountered empty table");
1200 if (token
!= TOK_STRING
)
1202 *status
= U_INVALID_FORMAT_ERROR
;
1204 if (token
== TOK_EOF
)
1206 error(startline
, "unterminated table");
1210 error(line
, "unexpected token %s", tokenNames
[token
]);
1216 if(uprv_isInvariantUString(tokenValue
->fChars
, -1)) {
1217 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
1219 *status
= U_INVALID_FORMAT_ERROR
;
1220 error(line
, "invariant characters required for table keys");
1224 if (U_FAILURE(*status
))
1226 error(line
, "parse error. Stopped parsing tokens with %s", u_errorName(*status
));
1230 member
= parseResource(state
, subtag
, &comment
, status
);
1232 if (member
== NULL
|| U_FAILURE(*status
))
1234 error(line
, "parse error. Stopped parsing resource with %s", u_errorName(*status
));
1238 table
->add(member
, line
, *status
);
1240 if (U_FAILURE(*status
))
1242 error(line
, "parse error. Stopped parsing table with %s", u_errorName(*status
));
1246 ustr_deinit(&comment
);
1250 /* A compiler warning will appear if all paths don't contain a return statement. */
1251 /* *status = U_INTERNAL_PROGRAM_ERROR;
1255 static struct SResource
*
1256 parseTable(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1258 if (tag
!= NULL
&& uprv_strcmp(tag
, "CollationElements") == 0)
1260 return parseCollationElements(state
, tag
, startline
, FALSE
, status
);
1262 if (tag
!= NULL
&& uprv_strcmp(tag
, "collations") == 0)
1264 return parseCollationElements(state
, tag
, startline
, TRUE
, status
);
1267 printf(" table %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1270 TableResource
*result
= table_open(state
->bundle
, tag
, comment
, status
);
1272 if (result
== NULL
|| U_FAILURE(*status
))
1276 return realParseTable(state
, result
, tag
, startline
, status
);
1279 static struct SResource
*
1280 parseArray(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1282 struct SResource
*member
= NULL
;
1283 struct UString
*tokenValue
;
1284 struct UString memberComments
;
1285 enum ETokenType token
;
1286 UBool readToken
= FALSE
;
1288 ArrayResource
*result
= array_open(state
->bundle
, tag
, comment
, status
);
1290 if (result
== NULL
|| U_FAILURE(*status
))
1295 printf(" array %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1298 ustr_init(&memberComments
);
1300 /* '{' . resource [','] '}' */
1304 ustr_setlen(&memberComments
, 0, status
);
1306 /* check for end of array, but don't consume next token unless it really is the end */
1307 token
= peekToken(state
, 0, &tokenValue
, NULL
, &memberComments
, status
);
1310 if (token
== TOK_CLOSE_BRACE
)
1312 getToken(state
, NULL
, NULL
, NULL
, status
);
1314 warning(startline
, "Encountered empty array");
1319 if (token
== TOK_EOF
)
1322 *status
= U_INVALID_FORMAT_ERROR
;
1323 error(startline
, "unterminated array");
1327 /* string arrays are a special case */
1328 if (token
== TOK_STRING
)
1330 getToken(state
, &tokenValue
, &memberComments
, NULL
, status
);
1331 member
= string_open(state
->bundle
, NULL
, tokenValue
->fChars
, tokenValue
->fLength
, &memberComments
, status
);
1335 member
= parseResource(state
, NULL
, &memberComments
, status
);
1338 if (member
== NULL
|| U_FAILURE(*status
))
1344 result
->add(member
);
1346 /* eat optional comma if present */
1347 token
= peekToken(state
, 0, NULL
, NULL
, NULL
, status
);
1349 if (token
== TOK_COMMA
)
1351 getToken(state
, NULL
, NULL
, NULL
, status
);
1354 if (U_FAILURE(*status
))
1362 ustr_deinit(&memberComments
);
1366 static struct SResource
*
1367 parseIntVector(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1369 enum ETokenType token
;
1372 UBool readToken
= FALSE
;
1374 struct UString memberComments
;
1376 IntVectorResource
*result
= intvector_open(state
->bundle
, tag
, comment
, status
);
1378 if (result
== NULL
|| U_FAILURE(*status
))
1384 printf(" vector %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1386 ustr_init(&memberComments
);
1387 /* '{' . string [','] '}' */
1390 ustr_setlen(&memberComments
, 0, status
);
1392 /* check for end of array, but don't consume next token unless it really is the end */
1393 token
= peekToken(state
, 0, NULL
, NULL
,&memberComments
, status
);
1395 if (token
== TOK_CLOSE_BRACE
)
1397 /* it's the end, consume the close brace */
1398 getToken(state
, NULL
, NULL
, NULL
, status
);
1400 warning(startline
, "Encountered empty int vector");
1402 ustr_deinit(&memberComments
);
1406 int32_t stringLength
;
1407 string
= getInvariantString(state
, NULL
, NULL
, stringLength
, status
);
1409 if (U_FAILURE(*status
))
1415 /* For handling illegal char in the Intvector */
1416 value
= uprv_strtoul(string
, &stopstring
, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1417 int32_t len
= (int32_t)(stopstring
-string
);
1419 if(len
==stringLength
)
1421 result
->add(value
, *status
);
1423 token
= peekToken(state
, 0, NULL
, NULL
, NULL
, status
);
1428 *status
=U_INVALID_CHAR_FOUND
;
1431 if (U_FAILURE(*status
))
1437 /* the comma is optional (even though it is required to prevent the reader from concatenating
1438 consecutive entries) so that a missing comma on the last entry isn't an error */
1439 if (token
== TOK_COMMA
)
1441 getToken(state
, NULL
, NULL
, NULL
, status
);
1447 /* A compiler warning will appear if all paths don't contain a return statement. */
1448 /* intvector_close(result, status);
1449 *status = U_INTERNAL_PROGRAM_ERROR;
1453 static struct SResource
*
1454 parseBinary(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1457 int32_t stringLength
;
1458 LocalMemory
<char> string(getInvariantString(state
, &line
, NULL
, stringLength
, status
));
1459 if (string
.isNull() || U_FAILURE(*status
))
1464 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1465 if (U_FAILURE(*status
))
1471 printf(" binary %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1474 LocalMemory
<uint8_t> value
;
1476 if (stringLength
> 0 && value
.allocateInsteadAndCopy(stringLength
) == NULL
)
1478 *status
= U_MEMORY_ALLOCATION_ERROR
;
1482 char toConv
[3] = {'\0', '\0', '\0'};
1483 for (int32_t i
= 0; i
< stringLength
;)
1485 // Skip spaces (which may have been line endings).
1486 char c0
= string
[i
++];
1487 if (c0
== ' ') { continue; }
1488 if (i
== stringLength
) {
1489 *status
=U_INVALID_CHAR_FOUND
;
1490 error(line
, "Encountered invalid binary value (odd number of hex digits)");
1494 toConv
[1] = string
[i
++];
1497 value
[count
++] = (uint8_t) uprv_strtoul(toConv
, &stopstring
, 16);
1498 uint32_t len
=(uint32_t)(stopstring
-toConv
);
1502 *status
=U_INVALID_CHAR_FOUND
;
1503 error(line
, "Encountered invalid binary value (not all pairs of hex digits)");
1509 warning(startline
, "Encountered empty binary value");
1510 return bin_open(state
->bundle
, tag
, 0, NULL
, "", comment
, status
);
1512 return bin_open(state
->bundle
, tag
, count
, value
.getAlias(), NULL
, comment
, status
);
1516 static struct SResource
*
1517 parseInteger(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1519 struct SResource
*result
= NULL
;
1524 int32_t stringLength
;
1525 string
= getInvariantString(state
, NULL
, NULL
, stringLength
, status
);
1527 if (string
== NULL
|| U_FAILURE(*status
))
1532 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1534 if (U_FAILURE(*status
))
1541 printf(" integer %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1544 if (stringLength
== 0)
1546 warning(startline
, "Encountered empty integer. Default value is 0.");
1549 /* Allow integer support for hexdecimal, octal digit and decimal*/
1550 /* and handle illegal char in the integer*/
1551 value
= uprv_strtoul(string
, &stopstring
, 0);
1552 int32_t len
= (int32_t)(stopstring
-string
);
1553 if(len
==stringLength
)
1555 result
= int_open(state
->bundle
, tag
, value
, comment
, status
);
1559 *status
=U_INVALID_CHAR_FOUND
;
1566 static struct SResource
*
1567 parseImport(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
1570 int32_t stringLength
;
1571 LocalMemory
<char> filename(getInvariantString(state
, &line
, NULL
, stringLength
, status
));
1572 if (U_FAILURE(*status
))
1577 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1579 if (U_FAILURE(*status
))
1585 printf(" import %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1588 /* Open the input file for reading */
1589 CharString fullname
;
1590 if (state
->inputdir
!= NULL
) {
1591 fullname
.append(state
->inputdir
, *status
);
1593 fullname
.appendPathPart(filename
.getAlias(), *status
);
1594 if (U_FAILURE(*status
)) {
1598 FileStream
*file
= T_FileStream_open(fullname
.data(), "rb");
1601 error(line
, "couldn't open input file %s", filename
.getAlias());
1602 *status
= U_FILE_ACCESS_ERROR
;
1606 int32_t len
= T_FileStream_size(file
);
1607 LocalMemory
<uint8_t> data
;
1608 if(data
.allocateInsteadAndCopy(len
) == NULL
)
1610 *status
= U_MEMORY_ALLOCATION_ERROR
;
1611 T_FileStream_close (file
);
1615 /* int32_t numRead = */ T_FileStream_read(file
, data
.getAlias(), len
);
1616 T_FileStream_close (file
);
1618 return bin_open(state
->bundle
, tag
, len
, data
.getAlias(), fullname
.data(), comment
, status
);
1621 static struct SResource
*
1622 parseInclude(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
1624 struct SResource
*result
;
1628 UChar
*pTarget
= NULL
;
1631 char *fullname
= NULL
;
1632 const char* cp
= NULL
;
1633 const UChar
* uBuffer
= NULL
;
1635 int32_t stringLength
;
1636 filename
= getInvariantString(state
, &line
, NULL
, stringLength
, status
);
1638 if (U_FAILURE(*status
))
1643 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1645 if (U_FAILURE(*status
))
1647 uprv_free(filename
);
1652 printf(" include %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1655 fullname
= (char *) uprv_malloc(state
->inputdirLength
+ stringLength
+ 2);
1657 if(fullname
== NULL
)
1659 *status
= U_MEMORY_ALLOCATION_ERROR
;
1660 uprv_free(filename
);
1664 if(state
->inputdir
!=NULL
){
1665 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
1668 uprv_strcpy(fullname
, state
->inputdir
);
1670 fullname
[state
->inputdirLength
] = U_FILE_SEP_CHAR
;
1671 fullname
[state
->inputdirLength
+ 1] = '\0';
1673 uprv_strcat(fullname
, filename
);
1677 uprv_strcpy(fullname
, state
->inputdir
);
1678 uprv_strcat(fullname
, filename
);
1681 uprv_strcpy(fullname
,filename
);
1684 ucbuf
= ucbuf_open(fullname
, &cp
,getShowWarning(),FALSE
,status
);
1686 if (U_FAILURE(*status
)) {
1687 error(line
, "couldn't open input file %s\n", filename
);
1691 uBuffer
= ucbuf_getBuffer(ucbuf
,&len
,status
);
1692 result
= string_open(state
->bundle
, tag
, uBuffer
, len
, comment
, status
);
1698 uprv_free(filename
);
1699 uprv_free(fullname
);
1708 U_STRING_DECL(k_type_string
, "string", 6);
1709 U_STRING_DECL(k_type_binary
, "binary", 6);
1710 U_STRING_DECL(k_type_bin
, "bin", 3);
1711 U_STRING_DECL(k_type_table
, "table", 5);
1712 U_STRING_DECL(k_type_table_no_fallback
, "table(nofallback)", 17);
1713 U_STRING_DECL(k_type_int
, "int", 3);
1714 U_STRING_DECL(k_type_integer
, "integer", 7);
1715 U_STRING_DECL(k_type_array
, "array", 5);
1716 U_STRING_DECL(k_type_alias
, "alias", 5);
1717 U_STRING_DECL(k_type_intvector
, "intvector", 9);
1718 U_STRING_DECL(k_type_import
, "import", 6);
1719 U_STRING_DECL(k_type_include
, "include", 7);
1721 /* Various non-standard processing plugins that create one or more special resources. */
1722 U_STRING_DECL(k_type_plugin_uca_rules
, "process(uca_rules)", 18);
1723 U_STRING_DECL(k_type_plugin_collation
, "process(collation)", 18);
1724 U_STRING_DECL(k_type_plugin_transliterator
, "process(transliterator)", 23);
1725 U_STRING_DECL(k_type_plugin_dependency
, "process(dependency)", 19);
1727 typedef enum EResourceType
1733 RESTYPE_TABLE_NO_FALLBACK
,
1740 RESTYPE_PROCESS_UCA_RULES
,
1741 RESTYPE_PROCESS_COLLATION
,
1742 RESTYPE_PROCESS_TRANSLITERATOR
,
1743 RESTYPE_PROCESS_DEPENDENCY
,
1748 const char *nameChars
; /* only used for debugging */
1749 const UChar
*nameUChars
;
1750 ParseResourceFunction
*parseFunction
;
1751 } gResourceTypes
[] = {
1752 {"Unknown", NULL
, NULL
},
1753 {"string", k_type_string
, parseString
},
1754 {"binary", k_type_binary
, parseBinary
},
1755 {"table", k_type_table
, parseTable
},
1756 {"table(nofallback)", k_type_table_no_fallback
, NULL
}, /* parseFunction will never be called */
1757 {"integer", k_type_integer
, parseInteger
},
1758 {"array", k_type_array
, parseArray
},
1759 {"alias", k_type_alias
, parseAlias
},
1760 {"intvector", k_type_intvector
, parseIntVector
},
1761 {"import", k_type_import
, parseImport
},
1762 {"include", k_type_include
, parseInclude
},
1763 {"process(uca_rules)", k_type_plugin_uca_rules
, parseUCARules
},
1764 {"process(collation)", k_type_plugin_collation
, NULL
/* not implemented yet */},
1765 {"process(transliterator)", k_type_plugin_transliterator
, parseTransliterator
},
1766 {"process(dependency)", k_type_plugin_dependency
, parseDependency
},
1767 {"reserved", NULL
, NULL
}
1772 U_STRING_INIT(k_type_string
, "string", 6);
1773 U_STRING_INIT(k_type_binary
, "binary", 6);
1774 U_STRING_INIT(k_type_bin
, "bin", 3);
1775 U_STRING_INIT(k_type_table
, "table", 5);
1776 U_STRING_INIT(k_type_table_no_fallback
, "table(nofallback)", 17);
1777 U_STRING_INIT(k_type_int
, "int", 3);
1778 U_STRING_INIT(k_type_integer
, "integer", 7);
1779 U_STRING_INIT(k_type_array
, "array", 5);
1780 U_STRING_INIT(k_type_alias
, "alias", 5);
1781 U_STRING_INIT(k_type_intvector
, "intvector", 9);
1782 U_STRING_INIT(k_type_import
, "import", 6);
1783 U_STRING_INIT(k_type_include
, "include", 7);
1785 U_STRING_INIT(k_type_plugin_uca_rules
, "process(uca_rules)", 18);
1786 U_STRING_INIT(k_type_plugin_collation
, "process(collation)", 18);
1787 U_STRING_INIT(k_type_plugin_transliterator
, "process(transliterator)", 23);
1788 U_STRING_INIT(k_type_plugin_dependency
, "process(dependency)", 19);
1791 static inline UBool
isTable(enum EResourceType type
) {
1792 return (UBool
)(type
==RESTYPE_TABLE
|| type
==RESTYPE_TABLE_NO_FALLBACK
);
1795 static enum EResourceType
1796 parseResourceType(ParseState
* state
, UErrorCode
*status
)
1798 struct UString
*tokenValue
;
1799 struct UString comment
;
1800 enum EResourceType result
= RESTYPE_UNKNOWN
;
1802 ustr_init(&comment
);
1803 expect(state
, TOK_STRING
, &tokenValue
, &comment
, &line
, status
);
1805 if (U_FAILURE(*status
))
1807 return RESTYPE_UNKNOWN
;
1810 *status
= U_ZERO_ERROR
;
1812 /* Search for normal types */
1813 result
=RESTYPE_UNKNOWN
;
1814 while ((result
=(EResourceType
)(result
+1)) < RESTYPE_RESERVED
) {
1815 if (u_strcmp(tokenValue
->fChars
, gResourceTypes
[result
].nameUChars
) == 0) {
1819 /* Now search for the aliases */
1820 if (u_strcmp(tokenValue
->fChars
, k_type_int
) == 0) {
1821 result
= RESTYPE_INTEGER
;
1823 else if (u_strcmp(tokenValue
->fChars
, k_type_bin
) == 0) {
1824 result
= RESTYPE_BINARY
;
1826 else if (result
== RESTYPE_RESERVED
) {
1827 char tokenBuffer
[1024];
1828 u_austrncpy(tokenBuffer
, tokenValue
->fChars
, sizeof(tokenBuffer
));
1829 tokenBuffer
[sizeof(tokenBuffer
) - 1] = 0;
1830 *status
= U_INVALID_FORMAT_ERROR
;
1831 error(line
, "unknown resource type '%s'", tokenBuffer
);
1837 /* parse a non-top-level resource */
1838 static struct SResource
*
1839 parseResource(ParseState
* state
, char *tag
, const struct UString
*comment
, UErrorCode
*status
)
1841 enum ETokenType token
;
1842 enum EResourceType resType
= RESTYPE_UNKNOWN
;
1843 ParseResourceFunction
*parseFunction
= NULL
;
1844 struct UString
*tokenValue
;
1849 token
= getToken(state
, &tokenValue
, NULL
, &startline
, status
);
1852 printf(" resource %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1855 /* name . [ ':' type ] '{' resource '}' */
1856 /* This function parses from the colon onwards. If the colon is present, parse the
1857 type then try to parse a resource of that type. If there is no explicit type,
1858 work it out using the lookahead tokens. */
1862 *status
= U_INVALID_FORMAT_ERROR
;
1863 error(startline
, "Unexpected EOF encountered");
1867 *status
= U_INVALID_FORMAT_ERROR
;
1871 resType
= parseResourceType(state
, status
);
1872 expect(state
, TOK_OPEN_BRACE
, &tokenValue
, NULL
, &startline
, status
);
1874 if (U_FAILURE(*status
))
1881 case TOK_OPEN_BRACE
:
1885 *status
= U_INVALID_FORMAT_ERROR
;
1886 error(startline
, "syntax error while reading a resource, expected '{' or ':'");
1891 if (resType
== RESTYPE_UNKNOWN
)
1893 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
1894 We could have any of the following:
1895 { { => array (nested)
1897 { string , => string array
1901 { string :/{ => table
1902 { string } => string
1905 token
= peekToken(state
, 0, NULL
, &line
, NULL
,status
);
1907 if (U_FAILURE(*status
))
1912 if (token
== TOK_OPEN_BRACE
|| token
== TOK_COLON
||token
==TOK_CLOSE_BRACE
)
1914 resType
= RESTYPE_ARRAY
;
1916 else if (token
== TOK_STRING
)
1918 token
= peekToken(state
, 1, NULL
, &line
, NULL
, status
);
1920 if (U_FAILURE(*status
))
1927 case TOK_COMMA
: resType
= RESTYPE_ARRAY
; break;
1928 case TOK_OPEN_BRACE
: resType
= RESTYPE_TABLE
; break;
1929 case TOK_CLOSE_BRACE
: resType
= RESTYPE_STRING
; break;
1930 case TOK_COLON
: resType
= RESTYPE_TABLE
; break;
1932 *status
= U_INVALID_FORMAT_ERROR
;
1933 error(line
, "Unexpected token after string, expected ',', '{' or '}'");
1939 *status
= U_INVALID_FORMAT_ERROR
;
1940 error(line
, "Unexpected token after '{'");
1944 /* printf("Type guessed as %s\n", resourceNames[resType]); */
1945 } else if(resType
== RESTYPE_TABLE_NO_FALLBACK
) {
1946 *status
= U_INVALID_FORMAT_ERROR
;
1947 error(startline
, "error: %s resource type not valid except on top bundle level", gResourceTypes
[resType
].nameChars
);
1952 /* We should now know what we need to parse next, so call the appropriate parser
1953 function and return. */
1954 parseFunction
= gResourceTypes
[resType
].parseFunction
;
1955 if (parseFunction
!= NULL
) {
1956 return parseFunction(state
, tag
, startline
, comment
, status
);
1959 *status
= U_INTERNAL_PROGRAM_ERROR
;
1960 error(startline
, "internal error: %s resource type found and not handled", gResourceTypes
[resType
].nameChars
);
1966 /* parse the top-level resource */
1968 parse(UCHARBUF
*buf
, const char *inputDir
, const char *outputDir
, const char *filename
,
1969 UBool makeBinaryCollation
, UBool omitCollationRules
, UErrorCode
*status
)
1971 struct UString
*tokenValue
;
1972 struct UString comment
;
1974 enum EResourceType bundleType
;
1975 enum ETokenType token
;
1980 for (i
= 0; i
< MAX_LOOKAHEAD
+ 1; i
++)
1982 ustr_init(&state
.lookahead
[i
].value
);
1983 ustr_init(&state
.lookahead
[i
].comment
);
1986 initLookahead(&state
, buf
, status
);
1988 state
.inputdir
= inputDir
;
1989 state
.inputdirLength
= (state
.inputdir
!= NULL
) ? (uint32_t)uprv_strlen(state
.inputdir
) : 0;
1990 state
.outputdir
= outputDir
;
1991 state
.outputdirLength
= (state
.outputdir
!= NULL
) ? (uint32_t)uprv_strlen(state
.outputdir
) : 0;
1992 state
.filename
= filename
;
1993 state
.makeBinaryCollation
= makeBinaryCollation
;
1994 state
.omitCollationRules
= omitCollationRules
;
1996 ustr_init(&comment
);
1997 expect(&state
, TOK_STRING
, &tokenValue
, &comment
, NULL
, status
);
1999 state
.bundle
= new SRBRoot(&comment
, FALSE
, *status
);
2001 if (state
.bundle
== NULL
|| U_FAILURE(*status
))
2003 delete state
.bundle
;
2009 state
.bundle
->setLocale(tokenValue
->fChars
, *status
);
2011 /* The following code is to make Empty bundle work no matter with :table specifer or not */
2012 token
= getToken(&state
, NULL
, NULL
, &line
, status
);
2013 if(token
==TOK_COLON
) {
2014 *status
=U_ZERO_ERROR
;
2015 bundleType
=parseResourceType(&state
, status
);
2017 if(isTable(bundleType
))
2019 expect(&state
, TOK_OPEN_BRACE
, NULL
, NULL
, &line
, status
);
2023 *status
=U_PARSE_ERROR
;
2024 error(line
, "parse error. Stopped parsing with %s", u_errorName(*status
));
2030 if(token
==TOK_OPEN_BRACE
)
2032 *status
=U_ZERO_ERROR
;
2033 bundleType
=RESTYPE_TABLE
;
2037 /* neither colon nor open brace */
2038 *status
=U_PARSE_ERROR
;
2039 bundleType
=RESTYPE_UNKNOWN
;
2040 error(line
, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status
));
2044 if (U_FAILURE(*status
))
2046 delete state
.bundle
;
2050 if(bundleType
==RESTYPE_TABLE_NO_FALLBACK
) {
2052 * Parse a top-level table with the table(nofallback) declaration.
2053 * This is the same as a regular table, but also sets the
2054 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2056 state
.bundle
->fNoFallback
=TRUE
;
2058 /* top-level tables need not handle special table names like "collations" */
2059 assert(!state
.bundle
->fIsPoolBundle
);
2060 assert(state
.bundle
->fRoot
->fType
== URES_TABLE
);
2061 TableResource
*rootTable
= static_cast<TableResource
*>(state
.bundle
->fRoot
);
2062 realParseTable(&state
, rootTable
, NULL
, line
, status
);
2063 if(dependencyArray
!=NULL
){
2064 rootTable
->add(dependencyArray
, 0, *status
);
2065 dependencyArray
= NULL
;
2067 if (U_FAILURE(*status
))
2069 delete state
.bundle
;
2070 res_close(dependencyArray
);
2074 if (getToken(&state
, NULL
, NULL
, &line
, status
) != TOK_EOF
)
2076 warning(line
, "extraneous text after resource bundle (perhaps unmatched braces)");
2078 *status
= U_INVALID_FORMAT_ERROR
;
2083 cleanupLookahead(&state
);
2084 ustr_deinit(&comment
);
2085 return state
.bundle
;