2 *******************************************************************************
4 * Copyright (C) 1998-2015, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 05/26/99 stephen Creation.
15 * 02/25/00 weiv Overhaul to write udata
16 * 5/10/01 Ram removed ustdio dependency
17 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
18 *******************************************************************************
21 // Safer use of UnicodeString.
22 #ifndef UNISTR_FROM_CHAR_EXPLICIT
23 # define UNISTR_FROM_CHAR_EXPLICIT explicit
26 // Less important, but still a good idea.
27 #ifndef UNISTR_FROM_STRING_EXPLICIT
28 # define UNISTR_FROM_STRING_EXPLICIT explicit
42 #include "unicode/ustring.h"
43 #include "unicode/uscript.h"
44 #include "unicode/utf16.h"
45 #include "unicode/putil.h"
46 #include "collationbuilder.h"
47 #include "collationdata.h"
48 #include "collationdatareader.h"
49 #include "collationdatawriter.h"
50 #include "collationfastlatinbuilder.h"
51 #include "collationinfo.h"
52 #include "collationroot.h"
53 #include "collationruleparser.h"
54 #include "collationtailoring.h"
57 /* Number of tokens to read ahead of the current stream position */
58 #define MAX_LOOKAHEAD 3
68 #define STARTCOMMAND 0x005B
69 #define ENDCOMMAND 0x005D
70 #define OPENSQBRACKET 0x005B
71 #define CLOSESQBRACKET 0x005D
73 using icu::LocalPointer
;
74 using icu::UnicodeString
;
80 struct UString comment
;
84 /* keep in sync with token defines in read.h */
85 const char *tokenNames
[TOK_TOKEN_COUNT
] =
87 "string", /* A string token, such as "MonthNames" */
88 "'{'", /* An opening brace character */
89 "'}'", /* A closing brace character */
93 "<end of file>", /* End of the file has been reached successfully */
97 /* Just to store "TRUE" */
98 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
101 struct Lookahead lookahead
[MAX_LOOKAHEAD
+ 1];
102 uint32_t lookaheadPosition
;
104 struct SRBRoot
*bundle
;
105 const char *inputdir
;
106 uint32_t inputdirLength
;
107 const char *outputdir
;
108 uint32_t outputdirLength
;
109 const char *filename
;
110 UBool makeBinaryCollation
;
111 UBool omitCollationRules
;
114 typedef struct SResource
*
115 ParseResourceFunction(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
);
117 static struct SResource
*parseResource(ParseState
* state
, char *tag
, const struct UString
*comment
, UErrorCode
*status
);
119 /* The nature of the lookahead buffer:
120 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
121 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
122 When getToken is called, the current pointer is moved to the next slot and the
123 old slot is filled with the next token from the reader by calling getNextToken.
124 The token values are stored in the slot, which means that token values don't
125 survive a call to getToken, ie.
129 getToken(&value, NULL, status);
130 getToken(NULL, NULL, status); bad - value is now a different string
133 initLookahead(ParseState
* state
, UCHARBUF
*buf
, UErrorCode
*status
)
135 static uint32_t initTypeStrings
= 0;
138 if (!initTypeStrings
)
143 state
->lookaheadPosition
= 0;
148 for (i
= 0; i
< MAX_LOOKAHEAD
; i
++)
150 state
->lookahead
[i
].type
= getNextToken(state
->buffer
, &state
->lookahead
[i
].value
, &state
->lookahead
[i
].line
, &state
->lookahead
[i
].comment
, status
);
151 if (U_FAILURE(*status
))
157 *status
= U_ZERO_ERROR
;
161 cleanupLookahead(ParseState
* state
)
164 for (i
= 0; i
<= MAX_LOOKAHEAD
; i
++)
166 ustr_deinit(&state
->lookahead
[i
].value
);
167 ustr_deinit(&state
->lookahead
[i
].comment
);
172 static enum ETokenType
173 getToken(ParseState
* state
, struct UString
**tokenValue
, struct UString
* comment
, uint32_t *linenumber
, UErrorCode
*status
)
175 enum ETokenType result
;
178 result
= state
->lookahead
[state
->lookaheadPosition
].type
;
180 if (tokenValue
!= NULL
)
182 *tokenValue
= &state
->lookahead
[state
->lookaheadPosition
].value
;
185 if (linenumber
!= NULL
)
187 *linenumber
= state
->lookahead
[state
->lookaheadPosition
].line
;
192 ustr_cpy(comment
, &(state
->lookahead
[state
->lookaheadPosition
].comment
), status
);
195 i
= (state
->lookaheadPosition
+ MAX_LOOKAHEAD
) % (MAX_LOOKAHEAD
+ 1);
196 state
->lookaheadPosition
= (state
->lookaheadPosition
+ 1) % (MAX_LOOKAHEAD
+ 1);
197 ustr_setlen(&state
->lookahead
[i
].comment
, 0, status
);
198 ustr_setlen(&state
->lookahead
[i
].value
, 0, status
);
199 state
->lookahead
[i
].type
= getNextToken(state
->buffer
, &state
->lookahead
[i
].value
, &state
->lookahead
[i
].line
, &state
->lookahead
[i
].comment
, status
);
201 /* printf("getToken, returning %s\n", tokenNames[result]); */
206 static enum ETokenType
207 peekToken(ParseState
* state
, uint32_t lookaheadCount
, struct UString
**tokenValue
, uint32_t *linenumber
, struct UString
*comment
, UErrorCode
*status
)
209 uint32_t i
= (state
->lookaheadPosition
+ lookaheadCount
) % (MAX_LOOKAHEAD
+ 1);
211 if (U_FAILURE(*status
))
216 if (lookaheadCount
>= MAX_LOOKAHEAD
)
218 *status
= U_INTERNAL_PROGRAM_ERROR
;
222 if (tokenValue
!= NULL
)
224 *tokenValue
= &state
->lookahead
[i
].value
;
227 if (linenumber
!= NULL
)
229 *linenumber
= state
->lookahead
[i
].line
;
233 ustr_cpy(comment
, &(state
->lookahead
[state
->lookaheadPosition
].comment
), status
);
236 return state
->lookahead
[i
].type
;
240 expect(ParseState
* state
, enum ETokenType expectedToken
, struct UString
**tokenValue
, struct UString
*comment
, uint32_t *linenumber
, UErrorCode
*status
)
244 enum ETokenType token
= getToken(state
, tokenValue
, comment
, &line
, status
);
246 if (linenumber
!= NULL
)
251 if (U_FAILURE(*status
))
256 if (token
!= expectedToken
)
258 *status
= U_INVALID_FORMAT_ERROR
;
259 error(line
, "expecting %s, got %s", tokenNames
[expectedToken
], tokenNames
[token
]);
263 *status
= U_ZERO_ERROR
;
267 static char *getInvariantString(ParseState
* state
, uint32_t *line
, struct UString
*comment
, UErrorCode
*status
)
269 struct UString
*tokenValue
;
273 expect(state
, TOK_STRING
, &tokenValue
, comment
, line
, status
);
275 if (U_FAILURE(*status
))
280 count
= u_strlen(tokenValue
->fChars
);
281 if(!uprv_isInvariantUString(tokenValue
->fChars
, count
)) {
282 *status
= U_INVALID_FORMAT_ERROR
;
283 error(*line
, "invariant characters required for table keys, binary data, etc.");
287 result
= static_cast<char *>(uprv_malloc(count
+1));
291 *status
= U_MEMORY_ALLOCATION_ERROR
;
295 u_UCharsToChars(tokenValue
->fChars
, result
, count
+1);
299 static struct SResource
*
300 parseUCARules(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* /*comment*/, UErrorCode
*status
)
302 struct SResource
*result
= NULL
;
303 struct UString
*tokenValue
;
304 FileStream
*file
= NULL
;
305 char filename
[256] = { '\0' };
306 char cs
[128] = { '\0' };
308 UBool quoted
= FALSE
;
309 UCHARBUF
*ucbuf
=NULL
;
311 const char* cp
= NULL
;
312 UChar
*pTarget
= NULL
;
313 UChar
*target
= NULL
;
314 UChar
*targetLimit
= NULL
;
317 expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
);
320 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
323 if (U_FAILURE(*status
))
327 /* make the filename including the directory */
328 if (state
->inputdir
!= NULL
)
330 uprv_strcat(filename
, state
->inputdir
);
332 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
334 uprv_strcat(filename
, U_FILE_SEP_STRING
);
338 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
340 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
342 if (U_FAILURE(*status
))
346 uprv_strcat(filename
, cs
);
348 if(state
->omitCollationRules
) {
352 ucbuf
= ucbuf_open(filename
, &cp
, getShowWarning(),FALSE
, status
);
354 if (U_FAILURE(*status
)) {
355 error(line
, "An error occured while opening the input file %s\n", filename
);
359 /* We allocate more space than actually required
360 * since the actual size needed for storing UChars
361 * is not known in UTF-8 byte stream
363 size
= ucbuf_size(ucbuf
) + 1;
364 pTarget
= (UChar
*) uprv_malloc(U_SIZEOF_UCHAR
* size
);
365 uprv_memset(pTarget
, 0, size
*U_SIZEOF_UCHAR
);
367 targetLimit
= pTarget
+size
;
369 /* read the rules into the buffer */
370 while (target
< targetLimit
)
372 c
= ucbuf_getc(ucbuf
, status
);
374 quoted
= (UBool
)!quoted
;
376 /* weiv (06/26/2002): adding the following:
377 * - preserving spaces in commands [...]
378 * - # comments until the end of line
380 if (c
== STARTCOMMAND
&& !quoted
)
383 * closing bracket will be handled by the
384 * append at the end of the loop
386 while(c
!= ENDCOMMAND
) {
387 U_APPEND_CHAR32_ONLY(c
, target
);
388 c
= ucbuf_getc(ucbuf
, status
);
391 else if (c
== HASH
&& !quoted
) {
393 while(c
!= CR
&& c
!= LF
) {
394 c
= ucbuf_getc(ucbuf
, status
);
398 else if (c
== ESCAPE
)
400 c
= unescape(ucbuf
, status
);
402 if (c
== (UChar32
)U_ERR
)
405 T_FileStream_close(file
);
409 else if (!quoted
&& (c
== SPACE
|| c
== TAB
|| c
== CR
|| c
== LF
))
411 /* ignore spaces carriage returns
412 * and line feed unless in the form \uXXXX
417 /* Append UChar * after dissembling if c > 0xffff*/
418 if (c
!= (UChar32
)U_EOF
)
420 U_APPEND_CHAR32_ONLY(c
, target
);
428 /* terminate the string */
429 if(target
< targetLimit
){
433 result
= string_open(state
->bundle
, tag
, pTarget
, (int32_t)(target
- pTarget
), NULL
, status
);
438 T_FileStream_close(file
);
443 static struct SResource
*
444 parseTransliterator(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* /*comment*/, UErrorCode
*status
)
446 struct SResource
*result
= NULL
;
447 struct UString
*tokenValue
;
448 FileStream
*file
= NULL
;
449 char filename
[256] = { '\0' };
450 char cs
[128] = { '\0' };
452 UCHARBUF
*ucbuf
=NULL
;
453 const char* cp
= NULL
;
454 UChar
*pTarget
= NULL
;
455 const UChar
*pSource
= NULL
;
458 expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
);
461 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
464 if (U_FAILURE(*status
))
468 /* make the filename including the directory */
469 if (state
->inputdir
!= NULL
)
471 uprv_strcat(filename
, state
->inputdir
);
473 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
475 uprv_strcat(filename
, U_FILE_SEP_STRING
);
479 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
481 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
483 if (U_FAILURE(*status
))
487 uprv_strcat(filename
, cs
);
490 ucbuf
= ucbuf_open(filename
, &cp
, getShowWarning(),FALSE
, status
);
492 if (U_FAILURE(*status
)) {
493 error(line
, "An error occured while opening the input file %s\n", filename
);
497 /* We allocate more space than actually required
498 * since the actual size needed for storing UChars
499 * is not known in UTF-8 byte stream
501 pSource
= ucbuf_getBuffer(ucbuf
, &size
, status
);
502 pTarget
= (UChar
*) uprv_malloc(U_SIZEOF_UCHAR
* (size
+ 1));
503 uprv_memset(pTarget
, 0, size
*U_SIZEOF_UCHAR
);
505 #if !UCONFIG_NO_TRANSLITERATION
506 size
= utrans_stripRules(pSource
, size
, pTarget
, status
);
509 fprintf(stderr
, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
511 result
= string_open(state
->bundle
, tag
, pTarget
, size
, NULL
, status
);
515 T_FileStream_close(file
);
519 static struct SResource
* dependencyArray
= NULL
;
521 static struct SResource
*
522 parseDependency(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
524 struct SResource
*result
= NULL
;
525 struct SResource
*elem
= NULL
;
526 struct UString
*tokenValue
;
528 char filename
[256] = { '\0' };
529 char cs
[128] = { '\0' };
531 expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
);
534 printf(" %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
537 if (U_FAILURE(*status
))
541 /* make the filename including the directory */
542 if (state
->outputdir
!= NULL
)
544 uprv_strcat(filename
, state
->outputdir
);
546 if (state
->outputdir
[state
->outputdirLength
- 1] != U_FILE_SEP_CHAR
)
548 uprv_strcat(filename
, U_FILE_SEP_STRING
);
552 u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
);
554 if (U_FAILURE(*status
))
558 uprv_strcat(filename
, cs
);
559 if(!T_FileStream_file_exists(filename
)){
561 error(line
, "The dependency file %s does not exist. Please make sure it exists.\n",filename
);
563 warning(line
, "The dependency file %s does not exist. Please make sure it exists.\n",filename
);
566 if(dependencyArray
==NULL
){
567 dependencyArray
= array_open(state
->bundle
, "%%DEPENDENCY", NULL
, status
);
570 result
= string_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
572 elem
= string_open(state
->bundle
, NULL
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
574 array_add(dependencyArray
, elem
, status
);
576 if (U_FAILURE(*status
))
580 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
583 static struct SResource
*
584 parseString(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
586 struct UString
*tokenValue
;
587 struct SResource
*result
= NULL
;
589 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
591 return parseUCARules(tag, startline, status);
594 printf(" string %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
596 expect(state
, TOK_STRING
, &tokenValue
, NULL
, NULL
, status
);
598 if (U_SUCCESS(*status
))
600 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
601 doesn't survive expect either) */
603 result
= string_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
604 if(U_SUCCESS(*status
) && result
) {
605 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
607 if (U_FAILURE(*status
))
618 static struct SResource
*
619 parseAlias(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
621 struct UString
*tokenValue
;
622 struct SResource
*result
= NULL
;
624 expect(state
, TOK_STRING
, &tokenValue
, NULL
, NULL
, status
);
627 printf(" alias %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
630 if (U_SUCCESS(*status
))
632 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
633 doesn't survive expect either) */
635 result
= alias_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
);
637 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
639 if (U_FAILURE(*status
))
649 #if !UCONFIG_NO_COLLATION
653 static struct SResource
* resLookup(struct SResource
* res
, const char* key
){
654 struct SResource
*current
= NULL
;
655 struct SResTable
*list
;
656 if (res
== res_none()) {
660 list
= &(res
->u
.fTable
);
662 current
= list
->fFirst
;
663 while (current
!= NULL
) {
664 if (uprv_strcmp(((list
->fRoot
->fKeys
) + (current
->fKey
)), key
) == 0) {
667 current
= current
->fNext
;
672 class GenrbImporter
: public icu::CollationRuleParser::Importer
{
674 GenrbImporter(const char *in
, const char *out
) : inputDir(in
), outputDir(out
) {}
675 virtual ~GenrbImporter();
676 virtual void getRules(
677 const char *localeID
, const char *collationType
,
678 UnicodeString
&rules
,
679 const char *&errorReason
, UErrorCode
&errorCode
);
682 const char *inputDir
;
683 const char *outputDir
;
686 GenrbImporter::~GenrbImporter() {}
689 GenrbImporter::getRules(
690 const char *localeID
, const char *collationType
,
691 UnicodeString
&rules
,
692 const char *& /*errorReason*/, UErrorCode
&errorCode
) {
693 struct SRBRoot
*data
= NULL
;
694 UCHARBUF
*ucbuf
= NULL
;
695 int localeLength
= strlen(localeID
);
696 char* filename
= (char*)uprv_malloc(localeLength
+5);
697 char *inputDirBuf
= NULL
;
698 char *openFileName
= NULL
;
703 struct SResource
* root
;
704 struct SResource
* collations
;
705 struct SResource
* collation
;
706 struct SResource
* sequence
;
708 memcpy(filename
, localeID
, localeLength
);
709 for(i
= 0; i
< localeLength
; i
++){
710 if(filename
[i
] == '-'){
714 filename
[localeLength
] = '.';
715 filename
[localeLength
+1] = 't';
716 filename
[localeLength
+2] = 'x';
717 filename
[localeLength
+3] = 't';
718 filename
[localeLength
+4] = 0;
721 if (U_FAILURE(errorCode
)) {
725 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
728 filelen
= (int32_t)uprv_strlen(filename
);
730 if(inputDir
== NULL
) {
731 const char *filenameBegin
= uprv_strrchr(filename
, U_FILE_SEP_CHAR
);
732 openFileName
= (char *) uprv_malloc(dirlen
+ filelen
+ 2);
733 openFileName
[0] = '\0';
734 if (filenameBegin
!= NULL
) {
736 * When a filename ../../../data/root.txt is specified,
737 * we presume that the input directory is ../../../data
738 * This is very important when the resource file includes
739 * another file, like UCARules.txt or thaidict.brk.
741 int32_t filenameSize
= (int32_t)(filenameBegin
- filename
+ 1);
742 inputDirBuf
= (char *)uprv_malloc(filenameSize
);
745 if(inputDirBuf
== NULL
) {
746 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
750 uprv_strncpy(inputDirBuf
, filename
, filenameSize
);
751 inputDirBuf
[filenameSize
- 1] = 0;
752 inputDir
= inputDirBuf
;
753 dirlen
= (int32_t)uprv_strlen(inputDir
);
756 dirlen
= (int32_t)uprv_strlen(inputDir
);
758 if(inputDir
[dirlen
-1] != U_FILE_SEP_CHAR
) {
759 openFileName
= (char *) uprv_malloc(dirlen
+ filelen
+ 2);
762 if(openFileName
== NULL
) {
763 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
767 openFileName
[0] = '\0';
769 * append the input dir to openFileName if the first char in
770 * filename is not file seperation char and the last char input directory is not '.'.
771 * This is to support :
772 * genrb -s. /home/icu/data
774 * The user cannot mix notations like
775 * genrb -s. /icu/data --- the absolute path specified. -s redundant
777 * genrb -s. icu/data --- start from CWD and look in icu/data dir
779 if( (filename
[0] != U_FILE_SEP_CHAR
) && (inputDir
[dirlen
-1] !='.')){
780 uprv_strcpy(openFileName
, inputDir
);
781 openFileName
[dirlen
] = U_FILE_SEP_CHAR
;
783 openFileName
[dirlen
+ 1] = '\0';
785 openFileName
= (char *) uprv_malloc(dirlen
+ filelen
+ 1);
788 if(openFileName
== NULL
) {
789 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
793 uprv_strcpy(openFileName
, inputDir
);
797 uprv_strcat(openFileName
, filename
);
798 /* printf("%s\n", openFileName); */
799 errorCode
= U_ZERO_ERROR
;
800 ucbuf
= ucbuf_open(openFileName
, &cp
,getShowWarning(),TRUE
, &errorCode
);
802 if(errorCode
== U_FILE_ACCESS_ERROR
) {
804 fprintf(stderr
, "couldn't open file %s\n", openFileName
== NULL
? filename
: openFileName
);
807 if (ucbuf
== NULL
|| U_FAILURE(errorCode
)) {
808 fprintf(stderr
, "An error occured processing file %s. Error: %s\n", openFileName
== NULL
? filename
: openFileName
,u_errorName(errorCode
));
812 /* Parse the data into an SRBRoot */
813 data
= parse(ucbuf
, inputDir
, outputDir
, filename
, FALSE
, FALSE
, &errorCode
);
814 if (U_FAILURE(errorCode
)) {
819 collations
= resLookup(root
, "collations");
820 if (collations
!= NULL
) {
821 collation
= resLookup(collations
, collationType
);
822 if (collation
!= NULL
) {
823 sequence
= resLookup(collation
, "Sequence");
824 if (sequence
!= NULL
) {
825 // No string pointer aliasing so that we need not hold onto the resource bundle.
826 rules
.setTo(sequence
->u
.fString
.fChars
, sequence
->u
.fString
.fLength
);
832 if (inputDirBuf
!= NULL
) {
833 uprv_free(inputDirBuf
);
836 if (openFileName
!= NULL
) {
837 uprv_free(openFileName
);
845 // Quick-and-dirty escaping function.
846 // Assumes that we are on an ASCII-based platform.
848 escape(const UChar
*s
, char *buffer
) {
849 int32_t length
= u_strlen(s
);
853 U16_NEXT(s
, i
, length
, c
);
857 } else if (0x20 <= c
&& c
<= 0x7e) {
859 *buffer
++ = (char)c
; // assumes ASCII-based platform
861 buffer
+= sprintf(buffer
, "\\u%04X", (int)c
);
868 #endif // !UCONFIG_NO_COLLATION
870 static struct SResource
*
871 addCollation(ParseState
* state
, struct SResource
*result
, const char *collationType
,
872 uint32_t startline
, UErrorCode
*status
)
874 // TODO: Use LocalPointer for result, or make caller close it when there is a failure.
875 struct SResource
*member
= NULL
;
876 struct UString
*tokenValue
;
877 struct UString comment
;
878 enum ETokenType token
;
881 UBool haveRules
= FALSE
;
882 UVersionInfo version
;
885 /* '{' . (name resource)* '}' */
886 version
[0]=0; version
[1]=0; version
[2]=0; version
[3]=0;
891 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
893 if (token
== TOK_CLOSE_BRACE
)
898 if (token
!= TOK_STRING
)
901 *status
= U_INVALID_FORMAT_ERROR
;
903 if (token
== TOK_EOF
)
905 error(startline
, "unterminated table");
909 error(line
, "Unexpected token %s", tokenNames
[token
]);
915 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
917 if (U_FAILURE(*status
))
923 member
= parseResource(state
, subtag
, NULL
, status
);
925 if (U_FAILURE(*status
))
932 // Ignore the parsed resources, continue parsing.
934 else if (uprv_strcmp(subtag
, "Version") == 0)
937 int32_t length
= member
->u
.fString
.fLength
;
939 if (length
>= (int32_t) sizeof(ver
))
941 length
= (int32_t) sizeof(ver
) - 1;
944 u_UCharsToChars(member
->u
.fString
.fChars
, ver
, length
+ 1); /* +1 for copying NULL */
945 u_versionFromString(version
, ver
);
947 table_add(result
, member
, line
, status
);
950 else if(uprv_strcmp(subtag
, "%%CollationBin")==0)
952 /* discard duplicate %%CollationBin if any*/
954 else if (uprv_strcmp(subtag
, "Sequence") == 0)
956 rules
.setTo(member
->u
.fString
.fChars
, member
->u
.fString
.fLength
);
958 // Defer building the collator until we have seen
959 // all sub-elements of the collation table, including the Version.
960 /* in order to achieve smaller data files, we can direct genrb */
961 /* to omit collation rules */
962 if(!state
->omitCollationRules
) {
963 table_add(result
, member
, line
, status
);
967 else // Just copy non-special items.
969 table_add(result
, member
, line
, status
);
972 res_close(member
); // TODO: use LocalPointer
973 if (U_FAILURE(*status
))
980 if (!haveRules
) { return result
; }
982 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
983 warning(line
, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
986 // CLDR ticket #3949, ICU ticket #8082:
987 // Do not build collation binary data for for-import-only "private" collation rule strings.
988 if (uprv_strncmp(collationType
, "private-", 8) == 0) {
990 printf("Not building %s~%s collation binary\n", state
->filename
, collationType
);
995 if(!state
->makeBinaryCollation
) {
997 printf("Not building %s~%s collation binary\n", state
->filename
, collationType
);
1001 UErrorCode intStatus
= U_ZERO_ERROR
;
1002 UParseError parseError
;
1003 uprv_memset(&parseError
, 0, sizeof(parseError
));
1004 GenrbImporter
importer(state
->inputdir
, state
->outputdir
);
1005 const icu::CollationTailoring
*base
= icu::CollationRoot::getRoot(intStatus
);
1006 if(U_FAILURE(intStatus
)) {
1007 error(line
, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus
));
1009 return NULL
; // TODO: use LocalUResourceBundlePointer for result
1011 icu::CollationBuilder
builder(base
, intStatus
);
1012 if(uprv_strncmp(collationType
, "search", 6) == 0) {
1013 builder
.disableFastLatin(); // build fast-Latin table unless search collator
1015 LocalPointer
<icu::CollationTailoring
> t(
1016 builder
.parseAndBuild(rules
, version
, &importer
, &parseError
, intStatus
));
1017 if(U_FAILURE(intStatus
)) {
1018 const char *reason
= builder
.getErrorReason();
1019 if(reason
== NULL
) { reason
= ""; }
1020 error(line
, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s %s",
1021 state
->filename
, collationType
,
1022 (long)parseError
.offset
, u_errorName(intStatus
), reason
);
1023 if(parseError
.preContext
[0] != 0 || parseError
.postContext
[0] != 0) {
1024 // Print pre- and post-context.
1025 char preBuffer
[100], postBuffer
[100];
1026 escape(parseError
.preContext
, preBuffer
);
1027 escape(parseError
.postContext
, postBuffer
);
1028 error(line
, " error context: \"...%s\" ! \"%s...\"", preBuffer
, postBuffer
);
1031 *status
= intStatus
;
1036 icu::LocalMemory
<uint8_t> buffer
;
1037 int32_t capacity
= 100000;
1038 uint8_t *dest
= buffer
.allocateInsteadAndCopy(capacity
);
1040 fprintf(stderr
, "memory allocation (%ld bytes) for file contents failed\n",
1042 *status
= U_MEMORY_ALLOCATION_ERROR
;
1046 int32_t indexes
[icu::CollationDataReader::IX_TOTAL_SIZE
+ 1];
1047 int32_t totalSize
= icu::CollationDataWriter::writeTailoring(
1048 *t
, *t
->settings
, indexes
, dest
, capacity
, intStatus
);
1049 if(intStatus
== U_BUFFER_OVERFLOW_ERROR
) {
1050 intStatus
= U_ZERO_ERROR
;
1051 capacity
= totalSize
;
1052 dest
= buffer
.allocateInsteadAndCopy(capacity
);
1054 fprintf(stderr
, "memory allocation (%ld bytes) for file contents failed\n",
1056 *status
= U_MEMORY_ALLOCATION_ERROR
;
1060 totalSize
= icu::CollationDataWriter::writeTailoring(
1061 *t
, *t
->settings
, indexes
, dest
, capacity
, intStatus
);
1063 if(U_FAILURE(intStatus
)) {
1064 fprintf(stderr
, "CollationDataWriter::writeTailoring() failed: %s\n",
1065 u_errorName(intStatus
));
1070 printf("%s~%s collation tailoring part sizes:\n", state
->filename
, collationType
);
1071 icu::CollationInfo::printSizes(totalSize
, indexes
);
1072 if(t
->settings
->hasReordering()) {
1073 printf("%s~%s collation reordering ranges:\n", state
->filename
, collationType
);
1074 icu::CollationInfo::printReorderRanges(
1075 *t
->data
, t
->settings
->reorderCodes
, t
->settings
->reorderCodesLength
);
1078 struct SResource
*collationBin
= bin_open(state
->bundle
, "%%CollationBin", totalSize
, dest
, NULL
, NULL
, status
);
1079 table_add(result
, collationBin
, line
, status
);
1080 if (U_FAILURE(*status
)) {
1089 keepCollationType(const char * /*type*/) {
1093 static struct SResource
*
1094 parseCollationElements(ParseState
* state
, char *tag
, uint32_t startline
, UBool newCollation
, UErrorCode
*status
)
1096 struct SResource
*result
= NULL
;
1097 struct SResource
*member
= NULL
;
1098 struct SResource
*collationRes
= NULL
;
1099 struct UString
*tokenValue
;
1100 struct UString comment
;
1101 enum ETokenType token
;
1102 char subtag
[1024], typeKeyword
[1024];
1105 result
= table_open(state
->bundle
, tag
, NULL
, status
);
1107 if (result
== NULL
|| U_FAILURE(*status
))
1112 printf(" collation elements %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1115 return addCollation(state
, result
, "(no type)", startline
, status
);
1119 ustr_init(&comment
);
1120 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
1122 if (token
== TOK_CLOSE_BRACE
)
1127 if (token
!= TOK_STRING
)
1130 *status
= U_INVALID_FORMAT_ERROR
;
1132 if (token
== TOK_EOF
)
1134 error(startline
, "unterminated table");
1138 error(line
, "Unexpected token %s", tokenNames
[token
]);
1144 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
1146 if (U_FAILURE(*status
))
1152 if (uprv_strcmp(subtag
, "default") == 0)
1154 member
= parseResource(state
, subtag
, NULL
, status
);
1156 if (U_FAILURE(*status
))
1162 table_add(result
, member
, line
, status
);
1166 token
= peekToken(state
, 0, &tokenValue
, &line
, &comment
, status
);
1167 /* this probably needs to be refactored or recursively use the parser */
1168 /* first we assume that our collation table won't have the explicit type */
1169 /* then, we cannot handle aliases */
1170 if(token
== TOK_OPEN_BRACE
) {
1171 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
1172 if (keepCollationType(subtag
)) {
1173 collationRes
= table_open(state
->bundle
, subtag
, NULL
, status
);
1175 collationRes
= NULL
;
1177 // need to parse the collation data regardless
1178 collationRes
= addCollation(state
, collationRes
, subtag
, startline
, status
);
1179 if (collationRes
!= NULL
) {
1180 table_add(result
, collationRes
, startline
, status
);
1182 } else if(token
== TOK_COLON
) { /* right now, we'll just try to see if we have aliases */
1183 /* we could have a table too */
1184 token
= peekToken(state
, 1, &tokenValue
, &line
, &comment
, status
);
1185 u_UCharsToChars(tokenValue
->fChars
, typeKeyword
, u_strlen(tokenValue
->fChars
) + 1);
1186 if(uprv_strcmp(typeKeyword
, "alias") == 0) {
1187 member
= parseResource(state
, subtag
, NULL
, status
);
1188 if (U_FAILURE(*status
))
1194 table_add(result
, member
, line
, status
);
1197 *status
= U_INVALID_FORMAT_ERROR
;
1202 *status
= U_INVALID_FORMAT_ERROR
;
1207 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1209 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1211 if (U_FAILURE(*status
))
1220 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1221 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1222 static struct SResource
*
1223 realParseTable(ParseState
* state
, struct SResource
*table
, char *tag
, uint32_t startline
, UErrorCode
*status
)
1225 struct SResource
*member
= NULL
;
1226 struct UString
*tokenValue
=NULL
;
1227 struct UString comment
;
1228 enum ETokenType token
;
1231 UBool readToken
= FALSE
;
1233 /* '{' . (name resource)* '}' */
1236 printf(" parsing table %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1240 ustr_init(&comment
);
1241 token
= getToken(state
, &tokenValue
, &comment
, &line
, status
);
1243 if (token
== TOK_CLOSE_BRACE
)
1246 warning(startline
, "Encountered empty table");
1251 if (token
!= TOK_STRING
)
1253 *status
= U_INVALID_FORMAT_ERROR
;
1255 if (token
== TOK_EOF
)
1257 error(startline
, "unterminated table");
1261 error(line
, "unexpected token %s", tokenNames
[token
]);
1267 if(uprv_isInvariantUString(tokenValue
->fChars
, -1)) {
1268 u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1);
1270 *status
= U_INVALID_FORMAT_ERROR
;
1271 error(line
, "invariant characters required for table keys");
1275 if (U_FAILURE(*status
))
1277 error(line
, "parse error. Stopped parsing tokens with %s", u_errorName(*status
));
1281 member
= parseResource(state
, subtag
, &comment
, status
);
1283 if (member
== NULL
|| U_FAILURE(*status
))
1285 error(line
, "parse error. Stopped parsing resource with %s", u_errorName(*status
));
1289 table_add(table
, member
, line
, status
);
1291 if (U_FAILURE(*status
))
1293 error(line
, "parse error. Stopped parsing table with %s", u_errorName(*status
));
1297 ustr_deinit(&comment
);
1301 /* A compiler warning will appear if all paths don't contain a return statement. */
1302 /* *status = U_INTERNAL_PROGRAM_ERROR;
1306 static struct SResource
*
1307 parseTable(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1309 struct SResource
*result
;
1311 if (tag
!= NULL
&& uprv_strcmp(tag
, "CollationElements") == 0)
1313 return parseCollationElements(state
, tag
, startline
, FALSE
, status
);
1315 if (tag
!= NULL
&& uprv_strcmp(tag
, "collations") == 0)
1317 return parseCollationElements(state
, tag
, startline
, TRUE
, status
);
1320 printf(" table %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1323 result
= table_open(state
->bundle
, tag
, comment
, status
);
1325 if (result
== NULL
|| U_FAILURE(*status
))
1329 return realParseTable(state
, result
, tag
, startline
, status
);
1332 static struct SResource
*
1333 parseArray(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1335 struct SResource
*result
= NULL
;
1336 struct SResource
*member
= NULL
;
1337 struct UString
*tokenValue
;
1338 struct UString memberComments
;
1339 enum ETokenType token
;
1340 UBool readToken
= FALSE
;
1342 result
= array_open(state
->bundle
, tag
, comment
, status
);
1344 if (result
== NULL
|| U_FAILURE(*status
))
1349 printf(" array %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1352 ustr_init(&memberComments
);
1354 /* '{' . resource [','] '}' */
1358 ustr_setlen(&memberComments
, 0, status
);
1360 /* check for end of array, but don't consume next token unless it really is the end */
1361 token
= peekToken(state
, 0, &tokenValue
, NULL
, &memberComments
, status
);
1364 if (token
== TOK_CLOSE_BRACE
)
1366 getToken(state
, NULL
, NULL
, NULL
, status
);
1368 warning(startline
, "Encountered empty array");
1373 if (token
== TOK_EOF
)
1376 *status
= U_INVALID_FORMAT_ERROR
;
1377 error(startline
, "unterminated array");
1381 /* string arrays are a special case */
1382 if (token
== TOK_STRING
)
1384 getToken(state
, &tokenValue
, &memberComments
, NULL
, status
);
1385 member
= string_open(state
->bundle
, NULL
, tokenValue
->fChars
, tokenValue
->fLength
, &memberComments
, status
);
1389 member
= parseResource(state
, NULL
, &memberComments
, status
);
1392 if (member
== NULL
|| U_FAILURE(*status
))
1398 array_add(result
, member
, status
);
1400 if (U_FAILURE(*status
))
1406 /* eat optional comma if present */
1407 token
= peekToken(state
, 0, NULL
, NULL
, NULL
, status
);
1409 if (token
== TOK_COMMA
)
1411 getToken(state
, NULL
, NULL
, NULL
, status
);
1414 if (U_FAILURE(*status
))
1422 ustr_deinit(&memberComments
);
1426 static struct SResource
*
1427 parseIntVector(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1429 struct SResource
*result
= NULL
;
1430 enum ETokenType token
;
1433 UBool readToken
= FALSE
;
1436 struct UString memberComments
;
1438 result
= intvector_open(state
->bundle
, tag
, comment
, status
);
1440 if (result
== NULL
|| U_FAILURE(*status
))
1446 printf(" vector %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1448 ustr_init(&memberComments
);
1449 /* '{' . string [','] '}' */
1452 ustr_setlen(&memberComments
, 0, status
);
1454 /* check for end of array, but don't consume next token unless it really is the end */
1455 token
= peekToken(state
, 0, NULL
, NULL
,&memberComments
, status
);
1457 if (token
== TOK_CLOSE_BRACE
)
1459 /* it's the end, consume the close brace */
1460 getToken(state
, NULL
, NULL
, NULL
, status
);
1462 warning(startline
, "Encountered empty int vector");
1464 ustr_deinit(&memberComments
);
1468 string
= getInvariantString(state
, NULL
, NULL
, status
);
1470 if (U_FAILURE(*status
))
1476 /* For handling illegal char in the Intvector */
1477 value
= uprv_strtoul(string
, &stopstring
, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1478 len
=(uint32_t)(stopstring
-string
);
1480 if(len
==uprv_strlen(string
))
1482 intvector_add(result
, value
, status
);
1484 token
= peekToken(state
, 0, NULL
, NULL
, NULL
, status
);
1489 *status
=U_INVALID_CHAR_FOUND
;
1492 if (U_FAILURE(*status
))
1498 /* the comma is optional (even though it is required to prevent the reader from concatenating
1499 consecutive entries) so that a missing comma on the last entry isn't an error */
1500 if (token
== TOK_COMMA
)
1502 getToken(state
, NULL
, NULL
, NULL
, status
);
1508 /* A compiler warning will appear if all paths don't contain a return statement. */
1509 /* intvector_close(result, status);
1510 *status = U_INTERNAL_PROGRAM_ERROR;
1514 static struct SResource
*
1515 parseBinary(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1517 struct SResource
*result
= NULL
;
1520 char toConv
[3] = {'\0', '\0', '\0'};
1527 string
= getInvariantString(state
, &line
, NULL
, status
);
1529 if (string
== NULL
|| U_FAILURE(*status
))
1534 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1536 if (U_FAILURE(*status
))
1543 printf(" binary %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1546 count
= (uint32_t)uprv_strlen(string
);
1549 value
= static_cast<uint8_t *>(uprv_malloc(sizeof(uint8_t) * count
));
1554 *status
= U_MEMORY_ALLOCATION_ERROR
;
1558 for (i
= 0; i
< count
; i
+= 2)
1560 toConv
[0] = string
[i
];
1561 toConv
[1] = string
[i
+ 1];
1563 value
[i
>> 1] = (uint8_t) uprv_strtoul(toConv
, &stopstring
, 16);
1564 len
=(uint32_t)(stopstring
-toConv
);
1566 if(len
!=uprv_strlen(toConv
))
1569 *status
=U_INVALID_CHAR_FOUND
;
1574 result
= bin_open(state
->bundle
, tag
, (i
>> 1), value
,NULL
, comment
, status
);
1580 *status
= U_INVALID_CHAR_FOUND
;
1582 error(line
, "Encountered invalid binary string");
1588 result
= bin_open(state
->bundle
, tag
, 0, NULL
, "",comment
,status
);
1589 warning(startline
, "Encountered empty binary tag");
1596 static struct SResource
*
1597 parseInteger(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
*comment
, UErrorCode
*status
)
1599 struct SResource
*result
= NULL
;
1605 string
= getInvariantString(state
, NULL
, NULL
, status
);
1607 if (string
== NULL
|| U_FAILURE(*status
))
1612 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1614 if (U_FAILURE(*status
))
1621 printf(" integer %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1624 if (uprv_strlen(string
) <= 0)
1626 warning(startline
, "Encountered empty integer. Default value is 0.");
1629 /* Allow integer support for hexdecimal, octal digit and decimal*/
1630 /* and handle illegal char in the integer*/
1631 value
= uprv_strtoul(string
, &stopstring
, 0);
1632 len
=(uint32_t)(stopstring
-string
);
1633 if(len
==uprv_strlen(string
))
1635 result
= int_open(state
->bundle
, tag
, value
, comment
, status
);
1639 *status
=U_INVALID_CHAR_FOUND
;
1646 static struct SResource
*
1647 parseImport(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
1649 struct SResource
*result
;
1655 char *fullname
= NULL
;
1656 filename
= getInvariantString(state
, &line
, NULL
, status
);
1658 if (U_FAILURE(*status
))
1663 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1665 if (U_FAILURE(*status
))
1667 uprv_free(filename
);
1672 printf(" import %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1675 /* Open the input file for reading */
1676 if (state
->inputdir
== NULL
)
1680 * Always save file file name, even if there's
1681 * no input directory specified. MIGHT BREAK SOMETHING
1683 int32_t filenameLength
= uprv_strlen(filename
);
1685 fullname
= (char *) uprv_malloc(filenameLength
+ 1);
1686 uprv_strcpy(fullname
, filename
);
1689 file
= T_FileStream_open(filename
, "rb");
1694 int32_t count
= (int32_t)uprv_strlen(filename
);
1696 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
1698 fullname
= (char *) uprv_malloc(state
->inputdirLength
+ count
+ 2);
1701 if(fullname
== NULL
)
1703 *status
= U_MEMORY_ALLOCATION_ERROR
;
1707 uprv_strcpy(fullname
, state
->inputdir
);
1709 fullname
[state
->inputdirLength
] = U_FILE_SEP_CHAR
;
1710 fullname
[state
->inputdirLength
+ 1] = '\0';
1712 uprv_strcat(fullname
, filename
);
1716 fullname
= (char *) uprv_malloc(state
->inputdirLength
+ count
+ 1);
1719 if(fullname
== NULL
)
1721 *status
= U_MEMORY_ALLOCATION_ERROR
;
1725 uprv_strcpy(fullname
, state
->inputdir
);
1726 uprv_strcat(fullname
, filename
);
1729 file
= T_FileStream_open(fullname
, "rb");
1735 error(line
, "couldn't open input file %s", filename
);
1736 *status
= U_FILE_ACCESS_ERROR
;
1740 len
= T_FileStream_size(file
);
1741 data
= (uint8_t*)uprv_malloc(len
* sizeof(uint8_t));
1745 *status
= U_MEMORY_ALLOCATION_ERROR
;
1746 T_FileStream_close (file
);
1750 /* int32_t numRead = */ T_FileStream_read (file
, data
, len
);
1751 T_FileStream_close (file
);
1753 result
= bin_open(state
->bundle
, tag
, len
, data
, fullname
, comment
, status
);
1756 uprv_free(filename
);
1757 uprv_free(fullname
);
1762 static struct SResource
*
1763 parseInclude(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode
*status
)
1765 struct SResource
*result
;
1769 UChar
*pTarget
= NULL
;
1772 char *fullname
= NULL
;
1774 const char* cp
= NULL
;
1775 const UChar
* uBuffer
= NULL
;
1777 filename
= getInvariantString(state
, &line
, NULL
, status
);
1778 count
= (int32_t)uprv_strlen(filename
);
1780 if (U_FAILURE(*status
))
1785 expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
);
1787 if (U_FAILURE(*status
))
1789 uprv_free(filename
);
1794 printf(" include %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1797 fullname
= (char *) uprv_malloc(state
->inputdirLength
+ count
+ 2);
1799 if(fullname
== NULL
)
1801 *status
= U_MEMORY_ALLOCATION_ERROR
;
1802 uprv_free(filename
);
1806 if(state
->inputdir
!=NULL
){
1807 if (state
->inputdir
[state
->inputdirLength
- 1] != U_FILE_SEP_CHAR
)
1810 uprv_strcpy(fullname
, state
->inputdir
);
1812 fullname
[state
->inputdirLength
] = U_FILE_SEP_CHAR
;
1813 fullname
[state
->inputdirLength
+ 1] = '\0';
1815 uprv_strcat(fullname
, filename
);
1819 uprv_strcpy(fullname
, state
->inputdir
);
1820 uprv_strcat(fullname
, filename
);
1823 uprv_strcpy(fullname
,filename
);
1826 ucbuf
= ucbuf_open(fullname
, &cp
,getShowWarning(),FALSE
,status
);
1828 if (U_FAILURE(*status
)) {
1829 error(line
, "couldn't open input file %s\n", filename
);
1833 uBuffer
= ucbuf_getBuffer(ucbuf
,&len
,status
);
1834 result
= string_open(state
->bundle
, tag
, uBuffer
, len
, comment
, status
);
1840 uprv_free(filename
);
1841 uprv_free(fullname
);
1850 U_STRING_DECL(k_type_string
, "string", 6);
1851 U_STRING_DECL(k_type_binary
, "binary", 6);
1852 U_STRING_DECL(k_type_bin
, "bin", 3);
1853 U_STRING_DECL(k_type_table
, "table", 5);
1854 U_STRING_DECL(k_type_table_no_fallback
, "table(nofallback)", 17);
1855 U_STRING_DECL(k_type_int
, "int", 3);
1856 U_STRING_DECL(k_type_integer
, "integer", 7);
1857 U_STRING_DECL(k_type_array
, "array", 5);
1858 U_STRING_DECL(k_type_alias
, "alias", 5);
1859 U_STRING_DECL(k_type_intvector
, "intvector", 9);
1860 U_STRING_DECL(k_type_import
, "import", 6);
1861 U_STRING_DECL(k_type_include
, "include", 7);
1863 /* Various non-standard processing plugins that create one or more special resources. */
1864 U_STRING_DECL(k_type_plugin_uca_rules
, "process(uca_rules)", 18);
1865 U_STRING_DECL(k_type_plugin_collation
, "process(collation)", 18);
1866 U_STRING_DECL(k_type_plugin_transliterator
, "process(transliterator)", 23);
1867 U_STRING_DECL(k_type_plugin_dependency
, "process(dependency)", 19);
1869 typedef enum EResourceType
1875 RESTYPE_TABLE_NO_FALLBACK
,
1882 RESTYPE_PROCESS_UCA_RULES
,
1883 RESTYPE_PROCESS_COLLATION
,
1884 RESTYPE_PROCESS_TRANSLITERATOR
,
1885 RESTYPE_PROCESS_DEPENDENCY
,
1890 const char *nameChars
; /* only used for debugging */
1891 const UChar
*nameUChars
;
1892 ParseResourceFunction
*parseFunction
;
1893 } gResourceTypes
[] = {
1894 {"Unknown", NULL
, NULL
},
1895 {"string", k_type_string
, parseString
},
1896 {"binary", k_type_binary
, parseBinary
},
1897 {"table", k_type_table
, parseTable
},
1898 {"table(nofallback)", k_type_table_no_fallback
, NULL
}, /* parseFunction will never be called */
1899 {"integer", k_type_integer
, parseInteger
},
1900 {"array", k_type_array
, parseArray
},
1901 {"alias", k_type_alias
, parseAlias
},
1902 {"intvector", k_type_intvector
, parseIntVector
},
1903 {"import", k_type_import
, parseImport
},
1904 {"include", k_type_include
, parseInclude
},
1905 {"process(uca_rules)", k_type_plugin_uca_rules
, parseUCARules
},
1906 {"process(collation)", k_type_plugin_collation
, NULL
/* not implemented yet */},
1907 {"process(transliterator)", k_type_plugin_transliterator
, parseTransliterator
},
1908 {"process(dependency)", k_type_plugin_dependency
, parseDependency
},
1909 {"reserved", NULL
, NULL
}
1914 U_STRING_INIT(k_type_string
, "string", 6);
1915 U_STRING_INIT(k_type_binary
, "binary", 6);
1916 U_STRING_INIT(k_type_bin
, "bin", 3);
1917 U_STRING_INIT(k_type_table
, "table", 5);
1918 U_STRING_INIT(k_type_table_no_fallback
, "table(nofallback)", 17);
1919 U_STRING_INIT(k_type_int
, "int", 3);
1920 U_STRING_INIT(k_type_integer
, "integer", 7);
1921 U_STRING_INIT(k_type_array
, "array", 5);
1922 U_STRING_INIT(k_type_alias
, "alias", 5);
1923 U_STRING_INIT(k_type_intvector
, "intvector", 9);
1924 U_STRING_INIT(k_type_import
, "import", 6);
1925 U_STRING_INIT(k_type_include
, "include", 7);
1927 U_STRING_INIT(k_type_plugin_uca_rules
, "process(uca_rules)", 18);
1928 U_STRING_INIT(k_type_plugin_collation
, "process(collation)", 18);
1929 U_STRING_INIT(k_type_plugin_transliterator
, "process(transliterator)", 23);
1930 U_STRING_INIT(k_type_plugin_dependency
, "process(dependency)", 19);
1933 static inline UBool
isTable(enum EResourceType type
) {
1934 return (UBool
)(type
==RESTYPE_TABLE
|| type
==RESTYPE_TABLE_NO_FALLBACK
);
1937 static enum EResourceType
1938 parseResourceType(ParseState
* state
, UErrorCode
*status
)
1940 struct UString
*tokenValue
;
1941 struct UString comment
;
1942 enum EResourceType result
= RESTYPE_UNKNOWN
;
1944 ustr_init(&comment
);
1945 expect(state
, TOK_STRING
, &tokenValue
, &comment
, &line
, status
);
1947 if (U_FAILURE(*status
))
1949 return RESTYPE_UNKNOWN
;
1952 *status
= U_ZERO_ERROR
;
1954 /* Search for normal types */
1955 result
=RESTYPE_UNKNOWN
;
1956 while ((result
=(EResourceType
)(result
+1)) < RESTYPE_RESERVED
) {
1957 if (u_strcmp(tokenValue
->fChars
, gResourceTypes
[result
].nameUChars
) == 0) {
1961 /* Now search for the aliases */
1962 if (u_strcmp(tokenValue
->fChars
, k_type_int
) == 0) {
1963 result
= RESTYPE_INTEGER
;
1965 else if (u_strcmp(tokenValue
->fChars
, k_type_bin
) == 0) {
1966 result
= RESTYPE_BINARY
;
1968 else if (result
== RESTYPE_RESERVED
) {
1969 char tokenBuffer
[1024];
1970 u_austrncpy(tokenBuffer
, tokenValue
->fChars
, sizeof(tokenBuffer
));
1971 tokenBuffer
[sizeof(tokenBuffer
) - 1] = 0;
1972 *status
= U_INVALID_FORMAT_ERROR
;
1973 error(line
, "unknown resource type '%s'", tokenBuffer
);
1979 /* parse a non-top-level resource */
1980 static struct SResource
*
1981 parseResource(ParseState
* state
, char *tag
, const struct UString
*comment
, UErrorCode
*status
)
1983 enum ETokenType token
;
1984 enum EResourceType resType
= RESTYPE_UNKNOWN
;
1985 ParseResourceFunction
*parseFunction
= NULL
;
1986 struct UString
*tokenValue
;
1991 token
= getToken(state
, &tokenValue
, NULL
, &startline
, status
);
1994 printf(" resource %s at line %i \n", (tag
== NULL
) ? "(null)" : tag
, (int)startline
);
1997 /* name . [ ':' type ] '{' resource '}' */
1998 /* This function parses from the colon onwards. If the colon is present, parse the
1999 type then try to parse a resource of that type. If there is no explicit type,
2000 work it out using the lookahead tokens. */
2004 *status
= U_INVALID_FORMAT_ERROR
;
2005 error(startline
, "Unexpected EOF encountered");
2009 *status
= U_INVALID_FORMAT_ERROR
;
2013 resType
= parseResourceType(state
, status
);
2014 expect(state
, TOK_OPEN_BRACE
, &tokenValue
, NULL
, &startline
, status
);
2016 if (U_FAILURE(*status
))
2023 case TOK_OPEN_BRACE
:
2027 *status
= U_INVALID_FORMAT_ERROR
;
2028 error(startline
, "syntax error while reading a resource, expected '{' or ':'");
2033 if (resType
== RESTYPE_UNKNOWN
)
2035 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
2036 We could have any of the following:
2037 { { => array (nested)
2039 { string , => string array
2043 { string :/{ => table
2044 { string } => string
2047 token
= peekToken(state
, 0, NULL
, &line
, NULL
,status
);
2049 if (U_FAILURE(*status
))
2054 if (token
== TOK_OPEN_BRACE
|| token
== TOK_COLON
||token
==TOK_CLOSE_BRACE
)
2056 resType
= RESTYPE_ARRAY
;
2058 else if (token
== TOK_STRING
)
2060 token
= peekToken(state
, 1, NULL
, &line
, NULL
, status
);
2062 if (U_FAILURE(*status
))
2069 case TOK_COMMA
: resType
= RESTYPE_ARRAY
; break;
2070 case TOK_OPEN_BRACE
: resType
= RESTYPE_TABLE
; break;
2071 case TOK_CLOSE_BRACE
: resType
= RESTYPE_STRING
; break;
2072 case TOK_COLON
: resType
= RESTYPE_TABLE
; break;
2074 *status
= U_INVALID_FORMAT_ERROR
;
2075 error(line
, "Unexpected token after string, expected ',', '{' or '}'");
2081 *status
= U_INVALID_FORMAT_ERROR
;
2082 error(line
, "Unexpected token after '{'");
2086 /* printf("Type guessed as %s\n", resourceNames[resType]); */
2087 } else if(resType
== RESTYPE_TABLE_NO_FALLBACK
) {
2088 *status
= U_INVALID_FORMAT_ERROR
;
2089 error(startline
, "error: %s resource type not valid except on top bundle level", gResourceTypes
[resType
].nameChars
);
2094 /* We should now know what we need to parse next, so call the appropriate parser
2095 function and return. */
2096 parseFunction
= gResourceTypes
[resType
].parseFunction
;
2097 if (parseFunction
!= NULL
) {
2098 return parseFunction(state
, tag
, startline
, comment
, status
);
2101 *status
= U_INTERNAL_PROGRAM_ERROR
;
2102 error(startline
, "internal error: %s resource type found and not handled", gResourceTypes
[resType
].nameChars
);
2108 /* parse the top-level resource */
2110 parse(UCHARBUF
*buf
, const char *inputDir
, const char *outputDir
, const char *filename
,
2111 UBool makeBinaryCollation
, UBool omitCollationRules
, UErrorCode
*status
)
2113 struct UString
*tokenValue
;
2114 struct UString comment
;
2116 enum EResourceType bundleType
;
2117 enum ETokenType token
;
2122 for (i
= 0; i
< MAX_LOOKAHEAD
+ 1; i
++)
2124 ustr_init(&state
.lookahead
[i
].value
);
2125 ustr_init(&state
.lookahead
[i
].comment
);
2128 initLookahead(&state
, buf
, status
);
2130 state
.inputdir
= inputDir
;
2131 state
.inputdirLength
= (state
.inputdir
!= NULL
) ? (uint32_t)uprv_strlen(state
.inputdir
) : 0;
2132 state
.outputdir
= outputDir
;
2133 state
.outputdirLength
= (state
.outputdir
!= NULL
) ? (uint32_t)uprv_strlen(state
.outputdir
) : 0;
2134 state
.filename
= filename
;
2135 state
.makeBinaryCollation
= makeBinaryCollation
;
2136 state
.omitCollationRules
= omitCollationRules
;
2138 ustr_init(&comment
);
2139 expect(&state
, TOK_STRING
, &tokenValue
, &comment
, NULL
, status
);
2141 state
.bundle
= bundle_open(&comment
, FALSE
, status
);
2143 if (state
.bundle
== NULL
|| U_FAILURE(*status
))
2149 bundle_setlocale(state
.bundle
, tokenValue
->fChars
, status
);
2151 /* The following code is to make Empty bundle work no matter with :table specifer or not */
2152 token
= getToken(&state
, NULL
, NULL
, &line
, status
);
2153 if(token
==TOK_COLON
) {
2154 *status
=U_ZERO_ERROR
;
2155 bundleType
=parseResourceType(&state
, status
);
2157 if(isTable(bundleType
))
2159 expect(&state
, TOK_OPEN_BRACE
, NULL
, NULL
, &line
, status
);
2163 *status
=U_PARSE_ERROR
;
2164 error(line
, "parse error. Stopped parsing with %s", u_errorName(*status
));
2170 if(token
==TOK_OPEN_BRACE
)
2172 *status
=U_ZERO_ERROR
;
2173 bundleType
=RESTYPE_TABLE
;
2177 /* neither colon nor open brace */
2178 *status
=U_PARSE_ERROR
;
2179 bundleType
=RESTYPE_UNKNOWN
;
2180 error(line
, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status
));
2184 if (U_FAILURE(*status
))
2186 bundle_close(state
.bundle
, status
);
2190 if(bundleType
==RESTYPE_TABLE_NO_FALLBACK
) {
2192 * Parse a top-level table with the table(nofallback) declaration.
2193 * This is the same as a regular table, but also sets the
2194 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2196 state
.bundle
->noFallback
=TRUE
;
2198 /* top-level tables need not handle special table names like "collations" */
2199 realParseTable(&state
, state
.bundle
->fRoot
, NULL
, line
, status
);
2200 if(dependencyArray
!=NULL
){
2201 table_add(state
.bundle
->fRoot
, dependencyArray
, 0, status
);
2202 dependencyArray
= NULL
;
2204 if (U_FAILURE(*status
))
2206 bundle_close(state
.bundle
, status
);
2207 res_close(dependencyArray
);
2211 if (getToken(&state
, NULL
, NULL
, &line
, status
) != TOK_EOF
)
2213 warning(line
, "extraneous text after resource bundle (perhaps unmatched braces)");
2215 *status
= U_INVALID_FORMAT_ERROR
;
2220 cleanupLookahead(&state
);
2221 ustr_deinit(&comment
);
2222 return state
.bundle
;