1 // © 2016 and later: Unicode, Inc. and others. 
   2 // License & terms of use: http://www.unicode.org/copyright.html 
   4 ******************************************************************************* 
   6 *   Copyright (C) 1998-2015, International Business Machines 
   7 *   Corporation and others.  All Rights Reserved. 
   9 ******************************************************************************* 
  13 * Modification History: 
  15 *   Date          Name          Description 
  16 *   05/26/99     stephen       Creation. 
  17 *   02/25/00     weiv          Overhaul to write udata 
  18 *   5/10/01      Ram           removed ustdio dependency 
  19 *   06/10/2001  Dominic Ludlam <dom@recoil.org> Rewritten 
  20 ******************************************************************************* 
  23 // Safer use of UnicodeString. 
  24 #ifndef UNISTR_FROM_CHAR_EXPLICIT 
  25 #   define UNISTR_FROM_CHAR_EXPLICIT explicit 
  28 // Less important, but still a good idea. 
  29 #ifndef UNISTR_FROM_STRING_EXPLICIT 
  30 #   define UNISTR_FROM_STRING_EXPLICIT explicit 
  45 #include "unicode/stringpiece.h" 
  46 #include "unicode/unistr.h" 
  47 #include "unicode/ustring.h" 
  48 #include "unicode/uscript.h" 
  49 #include "unicode/utf16.h" 
  50 #include "unicode/putil.h" 
  52 #include "collationbuilder.h" 
  53 #include "collationdata.h" 
  54 #include "collationdatareader.h" 
  55 #include "collationdatawriter.h" 
  56 #include "collationfastlatinbuilder.h" 
  57 #include "collationinfo.h" 
  58 #include "collationroot.h" 
  59 #include "collationruleparser.h" 
  60 #include "collationtailoring.h" 
  63 /* Number of tokens to read ahead of the current stream position */ 
  64 #define MAX_LOOKAHEAD   3 
  74 #define STARTCOMMAND     0x005B 
  75 #define ENDCOMMAND       0x005D 
  76 #define OPENSQBRACKET    0x005B 
  77 #define CLOSESQBRACKET   0x005D 
  79 using icu::CharString
; 
  80 using icu::LocalMemory
; 
  81 using icu::LocalPointer
; 
  82 using icu::LocalUCHARBUFPointer
; 
  83 using icu::StringPiece
; 
  84 using icu::UnicodeString
; 
  90      struct UString    comment
; 
  94 /* keep in sync with token defines in read.h */ 
  95 const char *tokenNames
[TOK_TOKEN_COUNT
] = 
  97      "string",             /* A string token, such as "MonthNames" */ 
  98      "'{'",                 /* An opening brace character */ 
  99      "'}'",                 /* A closing brace character */ 
 103      "<end of file>",     /* End of the file has been reached successfully */ 
 107 /* Just to store "TRUE" */ 
 108 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000}; 
 111     struct Lookahead  lookahead
[MAX_LOOKAHEAD 
+ 1]; 
 112     uint32_t          lookaheadPosition
; 
 114     struct SRBRoot 
*bundle
; 
 115     const char     *inputdir
; 
 116     uint32_t        inputdirLength
; 
 117     const char     *outputdir
; 
 118     uint32_t        outputdirLength
; 
 119     const char     *filename
; 
 120     UBool           makeBinaryCollation
; 
 121     UBool           omitCollationRules
; 
 124 typedef struct SResource 
* 
 125 ParseResourceFunction(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode 
*status
); 
 127 static struct SResource 
*parseResource(ParseState
* state
, char *tag
, const struct UString 
*comment
, UErrorCode 
*status
); 
 129 /* The nature of the lookahead buffer: 
 130    There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer.  This provides 
 131    MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value. 
 132    When getToken is called, the current pointer is moved to the next slot and the 
 133    old slot is filled with the next token from the reader by calling getNextToken. 
 134    The token values are stored in the slot, which means that token values don't 
 135    survive a call to getToken, ie. 
 139    getToken(&value, NULL, status); 
 140    getToken(NULL,   NULL, status);       bad - value is now a different string 
 143 initLookahead(ParseState
* state
, UCHARBUF 
*buf
, UErrorCode 
*status
) 
 145     static uint32_t initTypeStrings 
= 0; 
 148     if (!initTypeStrings
) 
 153     state
->lookaheadPosition   
= 0; 
 158     for (i 
= 0; i 
< MAX_LOOKAHEAD
; i
++) 
 160         state
->lookahead
[i
].type 
= getNextToken(state
->buffer
, &state
->lookahead
[i
].value
, &state
->lookahead
[i
].line
, &state
->lookahead
[i
].comment
, status
); 
 161         if (U_FAILURE(*status
)) 
 167     *status 
= U_ZERO_ERROR
; 
 171 cleanupLookahead(ParseState
* state
) 
 174     for (i 
= 0; i 
<= MAX_LOOKAHEAD
; i
++) 
 176         ustr_deinit(&state
->lookahead
[i
].value
); 
 177         ustr_deinit(&state
->lookahead
[i
].comment
); 
 182 static enum ETokenType
 
 183 getToken(ParseState
* state
, struct UString 
**tokenValue
, struct UString
* comment
, uint32_t *linenumber
, UErrorCode 
*status
) 
 185     enum ETokenType result
; 
 188     result 
= state
->lookahead
[state
->lookaheadPosition
].type
; 
 190     if (tokenValue 
!= NULL
) 
 192         *tokenValue 
= &state
->lookahead
[state
->lookaheadPosition
].value
; 
 195     if (linenumber 
!= NULL
) 
 197         *linenumber 
= state
->lookahead
[state
->lookaheadPosition
].line
; 
 202         ustr_cpy(comment
, &(state
->lookahead
[state
->lookaheadPosition
].comment
), status
); 
 205     i 
= (state
->lookaheadPosition 
+ MAX_LOOKAHEAD
) % (MAX_LOOKAHEAD 
+ 1); 
 206     state
->lookaheadPosition 
= (state
->lookaheadPosition 
+ 1) % (MAX_LOOKAHEAD 
+ 1); 
 207     ustr_setlen(&state
->lookahead
[i
].comment
, 0, status
); 
 208     ustr_setlen(&state
->lookahead
[i
].value
, 0, status
); 
 209     state
->lookahead
[i
].type 
= getNextToken(state
->buffer
, &state
->lookahead
[i
].value
, &state
->lookahead
[i
].line
, &state
->lookahead
[i
].comment
, status
); 
 211     /* printf("getToken, returning %s\n", tokenNames[result]); */ 
 216 static enum ETokenType
 
 217 peekToken(ParseState
* state
, uint32_t lookaheadCount
, struct UString 
**tokenValue
, uint32_t *linenumber
, struct UString 
*comment
, UErrorCode 
*status
) 
 219     uint32_t i 
= (state
->lookaheadPosition 
+ lookaheadCount
) % (MAX_LOOKAHEAD 
+ 1); 
 221     if (U_FAILURE(*status
)) 
 226     if (lookaheadCount 
>= MAX_LOOKAHEAD
) 
 228         *status 
= U_INTERNAL_PROGRAM_ERROR
; 
 232     if (tokenValue 
!= NULL
) 
 234         *tokenValue 
= &state
->lookahead
[i
].value
; 
 237     if (linenumber 
!= NULL
) 
 239         *linenumber 
= state
->lookahead
[i
].line
; 
 243         ustr_cpy(comment
, &(state
->lookahead
[state
->lookaheadPosition
].comment
), status
); 
 246     return state
->lookahead
[i
].type
; 
 250 expect(ParseState
* state
, enum ETokenType expectedToken
, struct UString 
**tokenValue
, struct UString 
*comment
, uint32_t *linenumber
, UErrorCode 
*status
) 
 254     enum ETokenType token 
= getToken(state
, tokenValue
, comment
, &line
, status
); 
 256     if (linenumber 
!= NULL
) 
 261     if (U_FAILURE(*status
)) 
 266     if (token 
!= expectedToken
) 
 268         *status 
= U_INVALID_FORMAT_ERROR
; 
 269         error(line
, "expecting %s, got %s", tokenNames
[expectedToken
], tokenNames
[token
]); 
 273         *status 
= U_ZERO_ERROR
; 
 277 static char *getInvariantString(ParseState
* state
, uint32_t *line
, struct UString 
*comment
, UErrorCode 
*status
) 
 279     struct UString 
*tokenValue
; 
 283     expect(state
, TOK_STRING
, &tokenValue
, comment
, line
, status
); 
 285     if (U_FAILURE(*status
)) 
 290     count 
= u_strlen(tokenValue
->fChars
); 
 291     if(!uprv_isInvariantUString(tokenValue
->fChars
, count
)) { 
 292         *status 
= U_INVALID_FORMAT_ERROR
; 
 293         error(*line
, "invariant characters required for table keys, binary data, etc."); 
 297     result 
= static_cast<char *>(uprv_malloc(count
+1)); 
 301         *status 
= U_MEMORY_ALLOCATION_ERROR
; 
 305     u_UCharsToChars(tokenValue
->fChars
, result
, count
+1); 
 309 static struct SResource 
* 
 310 parseUCARules(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* /*comment*/, UErrorCode 
*status
) 
 312     struct SResource 
*result 
= NULL
; 
 313     struct UString   
*tokenValue
; 
 314     FileStream       
*file          
= NULL
; 
 315     char              filename
[256] = { '\0' }; 
 316     char              cs
[128]       = { '\0' }; 
 318     UBool quoted 
= FALSE
; 
 319     UCHARBUF 
*ucbuf
=NULL
; 
 321     const char* cp  
= NULL
; 
 322     UChar 
*pTarget     
= NULL
; 
 323     UChar 
*target      
= NULL
; 
 324     UChar 
*targetLimit 
= NULL
; 
 327     expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
); 
 330         printf(" %s at line %i \n",  (tag 
== NULL
) ? "(null)" : tag
, (int)startline
); 
 333     if (U_FAILURE(*status
)) 
 337     /* make the filename including the directory */ 
 338     if (state
->inputdir 
!= NULL
) 
 340         uprv_strcat(filename
, state
->inputdir
); 
 342         if (state
->inputdir
[state
->inputdirLength 
- 1] != U_FILE_SEP_CHAR
) 
 344             uprv_strcat(filename
, U_FILE_SEP_STRING
); 
 348     u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
); 
 350     expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
); 
 352     if (U_FAILURE(*status
)) 
 356     uprv_strcat(filename
, cs
); 
 358     if(state
->omitCollationRules
) { 
 362     ucbuf 
= ucbuf_open(filename
, &cp
, getShowWarning(),FALSE
, status
); 
 364     if (U_FAILURE(*status
)) { 
 365         error(line
, "An error occurred while opening the input file %s\n", filename
); 
 369     /* We allocate more space than actually required 
 370     * since the actual size needed for storing UChars 
 371     * is not known in UTF-8 byte stream 
 373     size        
= ucbuf_size(ucbuf
) + 1; 
 374     pTarget     
= (UChar
*) uprv_malloc(U_SIZEOF_UCHAR 
* size
); 
 375     uprv_memset(pTarget
, 0, size
*U_SIZEOF_UCHAR
); 
 377     targetLimit 
= pTarget
+size
; 
 379     /* read the rules into the buffer */ 
 380     while (target 
< targetLimit
) 
 382         c 
= ucbuf_getc(ucbuf
, status
); 
 384             quoted 
= (UBool
)!quoted
; 
 386         /* weiv (06/26/2002): adding the following: 
 387          * - preserving spaces in commands [...] 
 388          * - # comments until the end of line 
 390         if (c 
== STARTCOMMAND 
&& !quoted
) 
 393              * closing bracket will be handled by the 
 394              * append at the end of the loop 
 396             while(c 
!= ENDCOMMAND
) { 
 397                 U_APPEND_CHAR32_ONLY(c
, target
); 
 398                 c 
= ucbuf_getc(ucbuf
, status
); 
 401         else if (c 
== HASH 
&& !quoted
) { 
 403             while(c 
!= CR 
&& c 
!= LF
) { 
 404                 c 
= ucbuf_getc(ucbuf
, status
); 
 408         else if (c 
== ESCAPE
) 
 410             c 
= unescape(ucbuf
, status
); 
 412             if (c 
== (UChar32
)U_ERR
) 
 415                 T_FileStream_close(file
); 
 419         else if (!quoted 
&& (c 
== SPACE 
|| c 
== TAB 
|| c 
== CR 
|| c 
== LF
)) 
 421             /* ignore spaces carriage returns 
 422             * and line feed unless in the form \uXXXX 
 427         /* Append UChar * after dissembling if c > 0xffff*/ 
 428         if (c 
!= (UChar32
)U_EOF
) 
 430             U_APPEND_CHAR32_ONLY(c
, target
); 
 438     /* terminate the string */ 
 439     if(target 
< targetLimit
){ 
 443     result 
= string_open(state
->bundle
, tag
, pTarget
, (int32_t)(target 
- pTarget
), NULL
, status
); 
 448     T_FileStream_close(file
); 
 453 static struct SResource 
* 
 454 parseTransliterator(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* /*comment*/, UErrorCode 
*status
) 
 456     struct SResource 
*result 
= NULL
; 
 457     struct UString   
*tokenValue
; 
 458     FileStream       
*file          
= NULL
; 
 459     char              filename
[256] = { '\0' }; 
 460     char              cs
[128]       = { '\0' }; 
 462     UCHARBUF 
*ucbuf
=NULL
; 
 463     const char* cp  
= NULL
; 
 464     UChar 
*pTarget     
= NULL
; 
 465     const UChar 
*pSource     
= NULL
; 
 468     expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
); 
 471         printf(" %s at line %i \n",  (tag 
== NULL
) ? "(null)" : tag
, (int)startline
); 
 474     if (U_FAILURE(*status
)) 
 478     /* make the filename including the directory */ 
 479     if (state
->inputdir 
!= NULL
) 
 481         uprv_strcat(filename
, state
->inputdir
); 
 483         if (state
->inputdir
[state
->inputdirLength 
- 1] != U_FILE_SEP_CHAR
) 
 485             uprv_strcat(filename
, U_FILE_SEP_STRING
); 
 489     u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
); 
 491     expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
); 
 493     if (U_FAILURE(*status
)) 
 497     uprv_strcat(filename
, cs
); 
 500     ucbuf 
= ucbuf_open(filename
, &cp
, getShowWarning(),FALSE
, status
); 
 502     if (U_FAILURE(*status
)) { 
 503         error(line
, "An error occurred while opening the input file %s\n", filename
); 
 507     /* We allocate more space than actually required 
 508     * since the actual size needed for storing UChars 
 509     * is not known in UTF-8 byte stream 
 511     pSource 
= ucbuf_getBuffer(ucbuf
, &size
, status
); 
 512     pTarget     
= (UChar
*) uprv_malloc(U_SIZEOF_UCHAR 
* (size 
+ 1)); 
 513     uprv_memset(pTarget
, 0, size
*U_SIZEOF_UCHAR
); 
 515 #if !UCONFIG_NO_TRANSLITERATION 
 516     size 
= utrans_stripRules(pSource
, size
, pTarget
, status
); 
 519     fprintf(stderr
, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n"); 
 521     result 
= string_open(state
->bundle
, tag
, pTarget
, size
, NULL
, status
); 
 525     T_FileStream_close(file
); 
 529 static ArrayResource
* dependencyArray 
= NULL
; 
 531 static struct SResource 
* 
 532 parseDependency(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode 
*status
) 
 534     struct SResource 
*result 
= NULL
; 
 535     struct SResource 
*elem 
= NULL
; 
 536     struct UString   
*tokenValue
; 
 538     char              filename
[256] = { '\0' }; 
 539     char              cs
[128]       = { '\0' }; 
 541     expect(state
, TOK_STRING
, &tokenValue
, NULL
, &line
, status
); 
 544         printf(" %s at line %i \n",  (tag 
== NULL
) ? "(null)" : tag
, (int)startline
); 
 547     if (U_FAILURE(*status
)) 
 551     /* make the filename including the directory */ 
 552     if (state
->outputdir 
!= NULL
) 
 554         uprv_strcat(filename
, state
->outputdir
); 
 556         if (state
->outputdir
[state
->outputdirLength 
- 1] != U_FILE_SEP_CHAR
) 
 558             uprv_strcat(filename
, U_FILE_SEP_STRING
); 
 562     u_UCharsToChars(tokenValue
->fChars
, cs
, tokenValue
->fLength
); 
 564     if (U_FAILURE(*status
)) 
 568     uprv_strcat(filename
, cs
); 
 569     if(!T_FileStream_file_exists(filename
)){ 
 571             error(line
, "The dependency file %s does not exist. Please make sure it exists.\n",filename
); 
 573             warning(line
, "The dependency file %s does not exist. Please make sure it exists.\n",filename
); 
 576     if(dependencyArray
==NULL
){ 
 577         dependencyArray 
= array_open(state
->bundle
, "%%DEPENDENCY", NULL
, status
); 
 580         result 
= string_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
); 
 582     elem 
= string_open(state
->bundle
, NULL
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
); 
 584     dependencyArray
->add(elem
); 
 586     if (U_FAILURE(*status
)) 
 590     expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
); 
 593 static struct SResource 
* 
 594 parseString(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode 
*status
) 
 596     struct UString   
*tokenValue
; 
 597     struct SResource 
*result 
= NULL
; 
 599 /*    if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0) 
 601         return parseUCARules(tag, startline, status); 
 604         printf(" string %s at line %i \n",  (tag 
== NULL
) ? "(null)" : tag
, (int)startline
); 
 606     expect(state
, TOK_STRING
, &tokenValue
, NULL
, NULL
, status
); 
 608     if (U_SUCCESS(*status
)) 
 610         /* create the string now - tokenValue doesn't survive a call to getToken (and therefore 
 611         doesn't survive expect either) */ 
 613         result 
= string_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
); 
 614         if(U_SUCCESS(*status
) && result
) { 
 615             expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
); 
 617             if (U_FAILURE(*status
)) 
 628 static struct SResource 
* 
 629 parseAlias(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString 
*comment
, UErrorCode 
*status
) 
 631     struct UString   
*tokenValue
; 
 632     struct SResource 
*result  
= NULL
; 
 634     expect(state
, TOK_STRING
, &tokenValue
, NULL
, NULL
, status
); 
 637         printf(" alias %s at line %i \n",  (tag 
== NULL
) ? "(null)" : tag
, (int)startline
); 
 640     if (U_SUCCESS(*status
)) 
 642         /* create the string now - tokenValue doesn't survive a call to getToken (and therefore 
 643         doesn't survive expect either) */ 
 645         result 
= alias_open(state
->bundle
, tag
, tokenValue
->fChars
, tokenValue
->fLength
, comment
, status
); 
 647         expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
); 
 649         if (U_FAILURE(*status
)) 
 659 #if !UCONFIG_NO_COLLATION 
 663 static struct SResource
* resLookup(struct SResource
* res
, const char* key
){ 
 664     if (res 
== res_none() || !res
->isTable()) { 
 668     TableResource 
*list 
= static_cast<TableResource 
*>(res
); 
 669     SResource 
*current 
= list
->fFirst
; 
 670     while (current 
!= NULL
) { 
 671         if (uprv_strcmp(((list
->fRoot
->fKeys
) + (current
->fKey
)), key
) == 0) { 
 674         current 
= current
->fNext
; 
 679 class GenrbImporter 
: public icu::CollationRuleParser::Importer 
{ 
 681     GenrbImporter(const char *in
, const char *out
) : inputDir(in
), outputDir(out
) {} 
 682     virtual ~GenrbImporter(); 
 683     virtual void getRules( 
 684             const char *localeID
, const char *collationType
, 
 685             UnicodeString 
&rules
, 
 686             const char *&errorReason
, UErrorCode 
&errorCode
); 
 689     const char *inputDir
; 
 690     const char *outputDir
; 
 693 GenrbImporter::~GenrbImporter() {} 
 696 GenrbImporter::getRules( 
 697         const char *localeID
, const char *collationType
, 
 698         UnicodeString 
&rules
, 
 699         const char *& /*errorReason*/, UErrorCode 
&errorCode
) { 
 700     CharString 
filename(localeID
, errorCode
); 
 701     for(int32_t i 
= 0; i 
< filename
.length(); i
++){ 
 702         if(filename
[i
] == '-'){ 
 703             filename
.data()[i
] = '_'; 
 706     filename
.append(".txt", errorCode
); 
 707     if (U_FAILURE(errorCode
)) { 
 710     CharString inputDirBuf
; 
 711     CharString openFileName
; 
 712     if(inputDir 
== NULL
) { 
 713         const char *filenameBegin 
= uprv_strrchr(filename
.data(), U_FILE_SEP_CHAR
); 
 714         if (filenameBegin 
!= NULL
) { 
 716              * When a filename ../../../data/root.txt is specified, 
 717              * we presume that the input directory is ../../../data 
 718              * This is very important when the resource file includes 
 719              * another file, like UCARules.txt or thaidict.brk. 
 721             StringPiece dir 
= filename
.toStringPiece(); 
 722             const char *filenameLimit 
= filename
.data() + filename
.length(); 
 723             dir
.remove_suffix((int32_t)(filenameLimit 
- filenameBegin
)); 
 724             inputDirBuf
.append(dir
, errorCode
); 
 725             inputDir 
= inputDirBuf
.data(); 
 728         int32_t dirlen  
= (int32_t)uprv_strlen(inputDir
); 
 730         if((filename
[0] != U_FILE_SEP_CHAR
) && (inputDir
[dirlen
-1] !='.')) { 
 732              * append the input dir to openFileName if the first char in 
 733              * filename is not file separator char and the last char input directory is  not '.'. 
 734              * This is to support : 
 735              * genrb -s. /home/icu/data 
 737              * The user cannot mix notations like 
 738              * genrb -s. /icu/data --- the absolute path specified. -s redundant 
 740              * genrb -s. icu/data  --- start from CWD and look in icu/data dir 
 742             openFileName
.append(inputDir
, dirlen
, errorCode
); 
 743             if(inputDir
[dirlen
-1] != U_FILE_SEP_CHAR
) { 
 744                 openFileName
.append(U_FILE_SEP_CHAR
, errorCode
); 
 748     openFileName
.append(filename
, errorCode
); 
 749     if(U_FAILURE(errorCode
)) { 
 752     // printf("GenrbImporter::getRules(%s, %s) reads %s\n", localeID, collationType, openFileName.data()); 
 754     LocalUCHARBUFPointer 
ucbuf( 
 755             ucbuf_open(openFileName
.data(), &cp
, getShowWarning(), TRUE
, &errorCode
)); 
 756     if(errorCode 
== U_FILE_ACCESS_ERROR
) { 
 757         fprintf(stderr
, "couldn't open file %s\n", openFileName
.data()); 
 760     if (ucbuf
.isNull() || U_FAILURE(errorCode
)) { 
 761         fprintf(stderr
, "An error occurred processing file %s. Error: %s\n", openFileName
.data(), u_errorName(errorCode
)); 
 765     /* Parse the data into an SRBRoot */ 
 766     LocalPointer
<SRBRoot
> data( 
 767             parse(ucbuf
.getAlias(), inputDir
, outputDir
, filename
.data(), FALSE
, FALSE
, &errorCode
)); 
 768     if (U_FAILURE(errorCode
)) { 
 772     struct SResource 
*root 
= data
->fRoot
; 
 773     struct SResource 
*collations 
= resLookup(root
, "collations"); 
 774     if (collations 
!= NULL
) { 
 775       struct SResource 
*collation 
= resLookup(collations
, collationType
); 
 776       if (collation 
!= NULL
) { 
 777         struct SResource 
*sequence 
= resLookup(collation
, "Sequence"); 
 778         if (sequence 
!= NULL 
&& sequence
->isString()) { 
 779           // No string pointer aliasing so that we need not hold onto the resource bundle. 
 780           StringResource 
*sr 
= static_cast<StringResource 
*>(sequence
); 
 787 // Quick-and-dirty escaping function. 
 788 // Assumes that we are on an ASCII-based platform. 
 790 escape(const UChar 
*s
, char *buffer
) { 
 791     int32_t length 
= u_strlen(s
); 
 795         U16_NEXT(s
, i
, length
, c
); 
 799         } else if (0x20 <= c 
&& c 
<= 0x7e) { 
 801             *buffer
++ = (char)c
;  // assumes ASCII-based platform 
 803             buffer 
+= sprintf(buffer
, "\\u%04X", (int)c
); 
 810 #endif  // !UCONFIG_NO_COLLATION 
 812 static TableResource 
* 
 813 addCollation(ParseState
* state
, TableResource  
*result
, const char *collationType
, 
 814              uint32_t startline
, UErrorCode 
*status
) 
 816     // TODO: Use LocalPointer for result, or make caller close it when there is a failure. 
 817     struct SResource  
*member 
= NULL
; 
 818     struct UString    
*tokenValue
; 
 819     struct UString     comment
; 
 820     enum   ETokenType  token
; 
 823     UBool              haveRules 
= FALSE
; 
 824     UVersionInfo       version
; 
 827     /* '{' . (name resource)* '}' */ 
 828     version
[0]=0; version
[1]=0; version
[2]=0; version
[3]=0; 
 833         token 
= getToken(state
, &tokenValue
, &comment
, &line
, status
); 
 835         if (token 
== TOK_CLOSE_BRACE
) 
 840         if (token 
!= TOK_STRING
) 
 843             *status 
= U_INVALID_FORMAT_ERROR
; 
 845             if (token 
== TOK_EOF
) 
 847                 error(startline
, "unterminated table"); 
 851                 error(line
, "Unexpected token %s", tokenNames
[token
]); 
 857         u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1); 
 859         if (U_FAILURE(*status
)) 
 865         member 
= parseResource(state
, subtag
, NULL
, status
); 
 867         if (U_FAILURE(*status
)) 
 874             // Ignore the parsed resources, continue parsing. 
 876         else if (uprv_strcmp(subtag
, "Version") == 0 && member
->isString()) 
 878             StringResource 
*sr 
= static_cast<StringResource 
*>(member
); 
 880             int32_t length 
= sr
->length(); 
 882             if (length 
>= UPRV_LENGTHOF(ver
)) 
 884                 length 
= UPRV_LENGTHOF(ver
) - 1; 
 887             sr
->fString
.extract(0, length
, ver
, UPRV_LENGTHOF(ver
), US_INV
); 
 888             u_versionFromString(version
, ver
); 
 890             result
->add(member
, line
, *status
); 
 893         else if(uprv_strcmp(subtag
, "%%CollationBin")==0) 
 895             /* discard duplicate %%CollationBin if any*/ 
 897         else if (uprv_strcmp(subtag
, "Sequence") == 0 && member
->isString()) 
 899             StringResource 
*sr 
= static_cast<StringResource 
*>(member
); 
 902             // Defer building the collator until we have seen 
 903             // all sub-elements of the collation table, including the Version. 
 904             /* in order to achieve smaller data files, we can direct genrb */ 
 905             /* to omit collation rules */ 
 906             if(!state
->omitCollationRules
) { 
 907                 result
->add(member
, line
, *status
); 
 911         else  // Just copy non-special items. 
 913             result
->add(member
, line
, *status
); 
 916         res_close(member
);  // TODO: use LocalPointer 
 917         if (U_FAILURE(*status
)) 
 924     if (!haveRules
) { return result
; } 
 926 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO 
 927     warning(line
, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h"); 
 930     // CLDR ticket #3949, ICU ticket #8082: 
 931     // Do not build collation binary data for for-import-only "private" collation rule strings. 
 932     if (uprv_strncmp(collationType
, "private-", 8) == 0) { 
 934             printf("Not building %s~%s collation binary\n", state
->filename
, collationType
); 
 939     if(!state
->makeBinaryCollation
) { 
 941             printf("Not building %s~%s collation binary\n", state
->filename
, collationType
); 
 945     UErrorCode intStatus 
= U_ZERO_ERROR
; 
 946     UParseError parseError
; 
 947     uprv_memset(&parseError
, 0, sizeof(parseError
)); 
 948     GenrbImporter 
importer(state
->inputdir
, state
->outputdir
); 
 949     const icu::CollationTailoring 
*base 
= icu::CollationRoot::getRoot(intStatus
); 
 950     if(U_FAILURE(intStatus
)) { 
 951         error(line
, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus
)); 
 953         return NULL
;  // TODO: use LocalUResourceBundlePointer for result 
 955     icu::CollationBuilder 
builder(base
, intStatus
); 
 956     if(uprv_strncmp(collationType
, "search", 6) == 0) { 
 957         builder
.disableFastLatin();  // build fast-Latin table unless search collator 
 959     LocalPointer
<icu::CollationTailoring
> t( 
 960             builder
.parseAndBuild(rules
, version
, &importer
, &parseError
, intStatus
)); 
 961     if(U_FAILURE(intStatus
)) { 
 962         const char *reason 
= builder
.getErrorReason(); 
 963         if(reason 
== NULL
) { reason 
= ""; } 
 964         error(line
, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s  %s", 
 965                 state
->filename
, collationType
, 
 966                 (long)parseError
.offset
, u_errorName(intStatus
), reason
); 
 967         if(parseError
.preContext
[0] != 0 || parseError
.postContext
[0] != 0) { 
 968             // Print pre- and post-context. 
 969             char preBuffer
[100], postBuffer
[100]; 
 970             escape(parseError
.preContext
, preBuffer
); 
 971             escape(parseError
.postContext
, postBuffer
); 
 972             error(line
, "  error context: \"...%s\" ! \"%s...\"", preBuffer
, postBuffer
); 
 974         if(isStrict() || t
.isNull()) { 
 980     icu::LocalMemory
<uint8_t> buffer
; 
 981     int32_t capacity 
= 100000; 
 982     uint8_t *dest 
= buffer
.allocateInsteadAndCopy(capacity
); 
 984         fprintf(stderr
, "memory allocation (%ld bytes) for file contents failed\n", 
 986         *status 
= U_MEMORY_ALLOCATION_ERROR
; 
 990     int32_t indexes
[icu::CollationDataReader::IX_TOTAL_SIZE 
+ 1]; 
 991     int32_t totalSize 
= icu::CollationDataWriter::writeTailoring( 
 992             *t
, *t
->settings
, indexes
, dest
, capacity
, intStatus
); 
 993     if(intStatus 
== U_BUFFER_OVERFLOW_ERROR
) { 
 994         intStatus 
= U_ZERO_ERROR
; 
 995         capacity 
= totalSize
; 
 996         dest 
= buffer
.allocateInsteadAndCopy(capacity
); 
 998             fprintf(stderr
, "memory allocation (%ld bytes) for file contents failed\n", 
1000             *status 
= U_MEMORY_ALLOCATION_ERROR
; 
1004         totalSize 
= icu::CollationDataWriter::writeTailoring( 
1005                 *t
, *t
->settings
, indexes
, dest
, capacity
, intStatus
); 
1007     if(U_FAILURE(intStatus
)) { 
1008         fprintf(stderr
, "CollationDataWriter::writeTailoring() failed: %s\n", 
1009                 u_errorName(intStatus
)); 
1014         printf("%s~%s collation tailoring part sizes:\n", state
->filename
, collationType
); 
1015         icu::CollationInfo::printSizes(totalSize
, indexes
); 
1016         if(t
->settings
->hasReordering()) { 
1017             printf("%s~%s collation reordering ranges:\n", state
->filename
, collationType
); 
1018             icu::CollationInfo::printReorderRanges( 
1019                     *t
->data
, t
->settings
->reorderCodes
, t
->settings
->reorderCodesLength
); 
1021 #if 0  // debugging output 
1023         printf("%s~%s collation tailoring part sizes:\n", state
->filename
, collationType
); 
1024         icu::CollationInfo::printSizes(totalSize
, indexes
); 
1027     struct SResource 
*collationBin 
= bin_open(state
->bundle
, "%%CollationBin", totalSize
, dest
, NULL
, NULL
, status
); 
1028     result
->add(collationBin
, line
, *status
); 
1029     if (U_FAILURE(*status
)) { 
1038 keepCollationType(const char * /*type*/) { 
1042 static struct SResource 
* 
1043 parseCollationElements(ParseState
* state
, char *tag
, uint32_t startline
, UBool newCollation
, UErrorCode 
*status
) 
1045     TableResource  
*result 
= NULL
; 
1046     struct SResource  
*member 
= NULL
; 
1047     struct UString    
*tokenValue
; 
1048     struct UString     comment
; 
1049     enum   ETokenType  token
; 
1050     char               subtag
[1024], typeKeyword
[1024]; 
1053     result 
= table_open(state
->bundle
, tag
, NULL
, status
); 
1055     if (result 
== NULL 
|| U_FAILURE(*status
)) 
1060         printf(" collation elements %s at line %i \n",  (tag 
== NULL
) ? "(null)" : tag
, (int)startline
); 
1063         return addCollation(state
, result
, "(no type)", startline
, status
); 
1067             ustr_init(&comment
); 
1068             token 
= getToken(state
, &tokenValue
, &comment
, &line
, status
); 
1070             if (token 
== TOK_CLOSE_BRACE
) 
1075             if (token 
!= TOK_STRING
) 
1078                 *status 
= U_INVALID_FORMAT_ERROR
; 
1080                 if (token 
== TOK_EOF
) 
1082                     error(startline
, "unterminated table"); 
1086                     error(line
, "Unexpected token %s", tokenNames
[token
]); 
1092             u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1); 
1094             if (U_FAILURE(*status
)) 
1100             if (uprv_strcmp(subtag
, "default") == 0) 
1102                 member 
= parseResource(state
, subtag
, NULL
, status
); 
1104                 if (U_FAILURE(*status
)) 
1110                 result
->add(member
, line
, *status
); 
1114                 token 
= peekToken(state
, 0, &tokenValue
, &line
, &comment
, status
); 
1115                 /* this probably needs to be refactored or recursively use the parser */ 
1116                 /* first we assume that our collation table won't have the explicit type */ 
1117                 /* then, we cannot handle aliases */ 
1118                 if(token 
== TOK_OPEN_BRACE
) { 
1119                     token 
= getToken(state
, &tokenValue
, &comment
, &line
, status
); 
1120                     TableResource 
*collationRes
; 
1121                     if (keepCollationType(subtag
)) { 
1122                         collationRes 
= table_open(state
->bundle
, subtag
, NULL
, status
); 
1124                         collationRes 
= NULL
; 
1126                     // need to parse the collation data regardless 
1127                     collationRes 
= addCollation(state
, collationRes
, subtag
, startline
, status
); 
1128                     if (collationRes 
!= NULL
) { 
1129                         result
->add(collationRes
, startline
, *status
); 
1131                 } else if(token 
== TOK_COLON
) { /* right now, we'll just try to see if we have aliases */ 
1132                     /* we could have a table too */ 
1133                     token 
= peekToken(state
, 1, &tokenValue
, &line
, &comment
, status
); 
1134                     u_UCharsToChars(tokenValue
->fChars
, typeKeyword
, u_strlen(tokenValue
->fChars
) + 1); 
1135                     if(uprv_strcmp(typeKeyword
, "alias") == 0) { 
1136                         member 
= parseResource(state
, subtag
, NULL
, status
); 
1137                         if (U_FAILURE(*status
)) 
1143                         result
->add(member
, line
, *status
); 
1146                         *status 
= U_INVALID_FORMAT_ERROR
; 
1151                     *status 
= U_INVALID_FORMAT_ERROR
; 
1156             /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/ 
1158             /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/ 
1160             if (U_FAILURE(*status
)) 
1169 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which, 
1170    if this weren't special-cased, wouldn't be set until the entire file had been processed. */ 
1171 static struct SResource 
* 
1172 realParseTable(ParseState
* state
, TableResource 
*table
, char *tag
, uint32_t startline
, UErrorCode 
*status
) 
1174     struct SResource  
*member 
= NULL
; 
1175     struct UString    
*tokenValue
=NULL
; 
1176     struct UString    comment
; 
1177     enum   ETokenType token
; 
1180     UBool             readToken 
= FALSE
; 
1182     /* '{' . (name resource)* '}' */ 
1185         printf(" parsing table %s at line %i \n", (tag 
== NULL
) ? "(null)" : tag
, (int)startline
); 
1189         ustr_init(&comment
); 
1190         token 
= getToken(state
, &tokenValue
, &comment
, &line
, status
); 
1192         if (token 
== TOK_CLOSE_BRACE
) 
1195                 warning(startline
, "Encountered empty table"); 
1200         if (token 
!= TOK_STRING
) 
1202             *status 
= U_INVALID_FORMAT_ERROR
; 
1204             if (token 
== TOK_EOF
) 
1206                 error(startline
, "unterminated table"); 
1210                 error(line
, "unexpected token %s", tokenNames
[token
]); 
1216         if(uprv_isInvariantUString(tokenValue
->fChars
, -1)) { 
1217             u_UCharsToChars(tokenValue
->fChars
, subtag
, u_strlen(tokenValue
->fChars
) + 1); 
1219             *status 
= U_INVALID_FORMAT_ERROR
; 
1220             error(line
, "invariant characters required for table keys"); 
1224         if (U_FAILURE(*status
)) 
1226             error(line
, "parse error. Stopped parsing tokens with %s", u_errorName(*status
)); 
1230         member 
= parseResource(state
, subtag
, &comment
, status
); 
1232         if (member 
== NULL 
|| U_FAILURE(*status
)) 
1234             error(line
, "parse error. Stopped parsing resource with %s", u_errorName(*status
)); 
1238         table
->add(member
, line
, *status
); 
1240         if (U_FAILURE(*status
)) 
1242             error(line
, "parse error. Stopped parsing table with %s", u_errorName(*status
)); 
1246         ustr_deinit(&comment
); 
1250     /* A compiler warning will appear if all paths don't contain a return statement. */ 
1251 /*     *status = U_INTERNAL_PROGRAM_ERROR; 
1255 static struct SResource 
* 
1256 parseTable(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString 
*comment
, UErrorCode 
*status
) 
1258     if (tag 
!= NULL 
&& uprv_strcmp(tag
, "CollationElements") == 0) 
1260         return parseCollationElements(state
, tag
, startline
, FALSE
, status
); 
1262     if (tag 
!= NULL 
&& uprv_strcmp(tag
, "collations") == 0) 
1264         return parseCollationElements(state
, tag
, startline
, TRUE
, status
); 
1267         printf(" table %s at line %i \n",  (tag 
== NULL
) ? "(null)" : tag
, (int)startline
); 
1270     TableResource 
*result 
= table_open(state
->bundle
, tag
, comment
, status
); 
1272     if (result 
== NULL 
|| U_FAILURE(*status
)) 
1276     return realParseTable(state
, result
, tag
, startline
,  status
); 
1279 static struct SResource 
* 
1280 parseArray(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString 
*comment
, UErrorCode 
*status
) 
1282     struct SResource  
*member 
= NULL
; 
1283     struct UString    
*tokenValue
; 
1284     struct UString    memberComments
; 
1285     enum   ETokenType token
; 
1286     UBool             readToken 
= FALSE
; 
1288     ArrayResource  
*result 
= array_open(state
->bundle
, tag
, comment
, status
); 
1290     if (result 
== NULL 
|| U_FAILURE(*status
)) 
1295         printf(" array %s at line %i \n",  (tag 
== NULL
) ? "(null)" : tag
, (int)startline
); 
1298     ustr_init(&memberComments
); 
1300     /* '{' . resource [','] '}' */ 
1304         ustr_setlen(&memberComments
, 0, status
); 
1306         /* check for end of array, but don't consume next token unless it really is the end */ 
1307         token 
= peekToken(state
, 0, &tokenValue
, NULL
, &memberComments
, status
); 
1310         if (token 
== TOK_CLOSE_BRACE
) 
1312             getToken(state
, NULL
, NULL
, NULL
, status
); 
1314                 warning(startline
, "Encountered empty array"); 
1319         if (token 
== TOK_EOF
) 
1322             *status 
= U_INVALID_FORMAT_ERROR
; 
1323             error(startline
, "unterminated array"); 
1327         /* string arrays are a special case */ 
1328         if (token 
== TOK_STRING
) 
1330             getToken(state
, &tokenValue
, &memberComments
, NULL
, status
); 
1331             member 
= string_open(state
->bundle
, NULL
, tokenValue
->fChars
, tokenValue
->fLength
, &memberComments
, status
); 
1335             member 
= parseResource(state
, NULL
, &memberComments
, status
); 
1338         if (member 
== NULL 
|| U_FAILURE(*status
)) 
1344         result
->add(member
); 
1346         /* eat optional comma if present */ 
1347         token 
= peekToken(state
, 0, NULL
, NULL
, NULL
, status
); 
1349         if (token 
== TOK_COMMA
) 
1351             getToken(state
, NULL
, NULL
, NULL
, status
); 
1354         if (U_FAILURE(*status
)) 
1362     ustr_deinit(&memberComments
); 
1366 static struct SResource 
* 
1367 parseIntVector(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString 
*comment
, UErrorCode 
*status
) 
1369     enum   ETokenType  token
; 
1372     UBool              readToken 
= FALSE
; 
1375     struct UString     memberComments
; 
1377     IntVectorResource 
*result 
= intvector_open(state
->bundle
, tag
, comment
, status
); 
1379     if (result 
== NULL 
|| U_FAILURE(*status
)) 
1385         printf(" vector %s at line %i \n",  (tag 
== NULL
) ? "(null)" : tag
, (int)startline
); 
1387     ustr_init(&memberComments
); 
1388     /* '{' . string [','] '}' */ 
1391         ustr_setlen(&memberComments
, 0, status
); 
1393         /* check for end of array, but don't consume next token unless it really is the end */ 
1394         token 
= peekToken(state
, 0, NULL
, NULL
,&memberComments
, status
); 
1396         if (token 
== TOK_CLOSE_BRACE
) 
1398             /* it's the end, consume the close brace */ 
1399             getToken(state
, NULL
, NULL
, NULL
, status
); 
1401                 warning(startline
, "Encountered empty int vector"); 
1403             ustr_deinit(&memberComments
); 
1407         string 
= getInvariantString(state
, NULL
, NULL
, status
); 
1409         if (U_FAILURE(*status
)) 
1415         /* For handling illegal char in the Intvector */ 
1416         value 
= uprv_strtoul(string
, &stopstring
, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/ 
1417         len
=(uint32_t)(stopstring
-string
); 
1419         if(len
==uprv_strlen(string
)) 
1421             result
->add(value
, *status
); 
1423             token 
= peekToken(state
, 0, NULL
, NULL
, NULL
, status
); 
1428             *status
=U_INVALID_CHAR_FOUND
; 
1431         if (U_FAILURE(*status
)) 
1437         /* the comma is optional (even though it is required to prevent the reader from concatenating 
1438         consecutive entries) so that a missing comma on the last entry isn't an error */ 
1439         if (token 
== TOK_COMMA
) 
1441             getToken(state
, NULL
, NULL
, NULL
, status
); 
1447     /* A compiler warning will appear if all paths don't contain a return statement. */ 
1448 /*    intvector_close(result, status); 
1449     *status = U_INTERNAL_PROGRAM_ERROR; 
1453 static struct SResource 
* 
1454 parseBinary(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString 
*comment
, UErrorCode 
*status
) 
1457     LocalMemory
<char> string(getInvariantString(state
, &line
, NULL
, status
)); 
1458     if (string
.isNull() || U_FAILURE(*status
)) 
1463     expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
); 
1464     if (U_FAILURE(*status
)) 
1470         printf(" binary %s at line %i \n",  (tag 
== NULL
) ? "(null)" : tag
, (int)startline
); 
1473     uint32_t count 
= (uint32_t)uprv_strlen(string
.getAlias()); 
1476             LocalMemory
<uint8_t> value
; 
1477             if (value
.allocateInsteadAndCopy(count
) == NULL
) 
1479                 *status 
= U_MEMORY_ALLOCATION_ERROR
; 
1483             char toConv
[3] = {'\0', '\0', '\0'}; 
1484             for (uint32_t i 
= 0; i 
< count
; i 
+= 2) 
1486                 toConv
[0] = string
[i
]; 
1487                 toConv
[1] = string
[i 
+ 1]; 
1490                 value
[i 
>> 1] = (uint8_t) uprv_strtoul(toConv
, &stopstring
, 16); 
1491                 uint32_t len
=(uint32_t)(stopstring
-toConv
); 
1495                     *status
=U_INVALID_CHAR_FOUND
; 
1500             return bin_open(state
->bundle
, tag
, count 
>> 1, value
.getAlias(), NULL
, comment
, status
); 
1504             *status 
= U_INVALID_CHAR_FOUND
; 
1505             error(line
, "Encountered invalid binary value (length is odd)"); 
1511         warning(startline
, "Encountered empty binary value"); 
1512         return bin_open(state
->bundle
, tag
, 0, NULL
, "", comment
, status
); 
1516 static struct SResource 
* 
1517 parseInteger(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString 
*comment
, UErrorCode 
*status
) 
1519     struct SResource 
*result 
= NULL
; 
1525     string 
= getInvariantString(state
, NULL
, NULL
, status
); 
1527     if (string 
== NULL 
|| U_FAILURE(*status
)) 
1532     expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
); 
1534     if (U_FAILURE(*status
)) 
1541         printf(" integer %s at line %i \n",  (tag 
== NULL
) ? "(null)" : tag
, (int)startline
); 
1544     if (uprv_strlen(string
) <= 0) 
1546         warning(startline
, "Encountered empty integer. Default value is 0."); 
1549     /* Allow integer support for hexdecimal, octal digit and decimal*/ 
1550     /* and handle illegal char in the integer*/ 
1551     value 
= uprv_strtoul(string
, &stopstring
, 0); 
1552     len
=(uint32_t)(stopstring
-string
); 
1553     if(len
==uprv_strlen(string
)) 
1555         result 
= int_open(state
->bundle
, tag
, value
, comment
, status
); 
1559         *status
=U_INVALID_CHAR_FOUND
; 
1566 static struct SResource 
* 
1567 parseImport(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode 
*status
) 
1570     LocalMemory
<char> filename(getInvariantString(state
, &line
, NULL
, status
)); 
1571     if (U_FAILURE(*status
)) 
1576     expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
); 
1578     if (U_FAILURE(*status
)) 
1584         printf(" import %s at line %i \n",  (tag 
== NULL
) ? "(null)" : tag
, (int)startline
); 
1587     /* Open the input file for reading */ 
1588     CharString fullname
; 
1589     if (state
->inputdir 
!= NULL
) { 
1590         fullname
.append(state
->inputdir
, *status
); 
1592     fullname
.appendPathPart(filename
.getAlias(), *status
); 
1593     if (U_FAILURE(*status
)) { 
1597     FileStream 
*file 
= T_FileStream_open(fullname
.data(), "rb"); 
1600         error(line
, "couldn't open input file %s", filename
.getAlias()); 
1601         *status 
= U_FILE_ACCESS_ERROR
; 
1605     int32_t len  
= T_FileStream_size(file
); 
1606     LocalMemory
<uint8_t> data
; 
1607     if(data
.allocateInsteadAndCopy(len
) == NULL
) 
1609         *status 
= U_MEMORY_ALLOCATION_ERROR
; 
1610         T_FileStream_close (file
); 
1614     /* int32_t numRead = */ T_FileStream_read(file
, data
.getAlias(), len
); 
1615     T_FileStream_close (file
); 
1617     return bin_open(state
->bundle
, tag
, len
, data
.getAlias(), fullname
.data(), comment
, status
); 
1620 static struct SResource 
* 
1621 parseInclude(ParseState
* state
, char *tag
, uint32_t startline
, const struct UString
* comment
, UErrorCode 
*status
) 
1623     struct SResource 
*result
; 
1627     UChar 
*pTarget     
= NULL
; 
1630     char     *fullname 
= NULL
; 
1632     const char* cp 
= NULL
; 
1633     const UChar
* uBuffer 
= NULL
; 
1635     filename 
= getInvariantString(state
, &line
, NULL
, status
); 
1636     count     
= (int32_t)uprv_strlen(filename
); 
1638     if (U_FAILURE(*status
)) 
1643     expect(state
, TOK_CLOSE_BRACE
, NULL
, NULL
, NULL
, status
); 
1645     if (U_FAILURE(*status
)) 
1647         uprv_free(filename
); 
1652         printf(" include %s at line %i \n",  (tag 
== NULL
) ? "(null)" : tag
, (int)startline
); 
1655     fullname 
= (char *) uprv_malloc(state
->inputdirLength 
+ count 
+ 2); 
1657     if(fullname 
== NULL
) 
1659         *status 
= U_MEMORY_ALLOCATION_ERROR
; 
1660         uprv_free(filename
); 
1664     if(state
->inputdir
!=NULL
){ 
1665         if (state
->inputdir
[state
->inputdirLength 
- 1] != U_FILE_SEP_CHAR
) 
1668             uprv_strcpy(fullname
, state
->inputdir
); 
1670             fullname
[state
->inputdirLength
]      = U_FILE_SEP_CHAR
; 
1671             fullname
[state
->inputdirLength 
+ 1] = '\0'; 
1673             uprv_strcat(fullname
, filename
); 
1677             uprv_strcpy(fullname
, state
->inputdir
); 
1678             uprv_strcat(fullname
, filename
); 
1681         uprv_strcpy(fullname
,filename
); 
1684     ucbuf 
= ucbuf_open(fullname
, &cp
,getShowWarning(),FALSE
,status
); 
1686     if (U_FAILURE(*status
)) { 
1687         error(line
, "couldn't open input file %s\n", filename
); 
1691     uBuffer 
= ucbuf_getBuffer(ucbuf
,&len
,status
); 
1692     result 
= string_open(state
->bundle
, tag
, uBuffer
, len
, comment
, status
); 
1698     uprv_free(filename
); 
1699     uprv_free(fullname
); 
1708 U_STRING_DECL(k_type_string
,    "string",    6); 
1709 U_STRING_DECL(k_type_binary
,    "binary",    6); 
1710 U_STRING_DECL(k_type_bin
,       "bin",       3); 
1711 U_STRING_DECL(k_type_table
,     "table",     5); 
1712 U_STRING_DECL(k_type_table_no_fallback
,     "table(nofallback)",         17); 
1713 U_STRING_DECL(k_type_int
,       "int",       3); 
1714 U_STRING_DECL(k_type_integer
,   "integer",   7); 
1715 U_STRING_DECL(k_type_array
,     "array",     5); 
1716 U_STRING_DECL(k_type_alias
,     "alias",     5); 
1717 U_STRING_DECL(k_type_intvector
, "intvector", 9); 
1718 U_STRING_DECL(k_type_import
,    "import",    6); 
1719 U_STRING_DECL(k_type_include
,   "include",   7); 
1721 /* Various non-standard processing plugins that create one or more special resources. */ 
1722 U_STRING_DECL(k_type_plugin_uca_rules
,      "process(uca_rules)",        18); 
1723 U_STRING_DECL(k_type_plugin_collation
,      "process(collation)",        18); 
1724 U_STRING_DECL(k_type_plugin_transliterator
, "process(transliterator)",   23); 
1725 U_STRING_DECL(k_type_plugin_dependency
,     "process(dependency)",       19); 
1727 typedef enum EResourceType
 
1733     RESTYPE_TABLE_NO_FALLBACK
, 
1740     RESTYPE_PROCESS_UCA_RULES
, 
1741     RESTYPE_PROCESS_COLLATION
, 
1742     RESTYPE_PROCESS_TRANSLITERATOR
, 
1743     RESTYPE_PROCESS_DEPENDENCY
, 
1748     const char *nameChars
;   /* only used for debugging */ 
1749     const UChar 
*nameUChars
; 
1750     ParseResourceFunction 
*parseFunction
; 
1751 } gResourceTypes
[] = { 
1752     {"Unknown", NULL
, NULL
}, 
1753     {"string", k_type_string
, parseString
}, 
1754     {"binary", k_type_binary
, parseBinary
}, 
1755     {"table", k_type_table
, parseTable
}, 
1756     {"table(nofallback)", k_type_table_no_fallback
, NULL
}, /* parseFunction will never be called */ 
1757     {"integer", k_type_integer
, parseInteger
}, 
1758     {"array", k_type_array
, parseArray
}, 
1759     {"alias", k_type_alias
, parseAlias
}, 
1760     {"intvector", k_type_intvector
, parseIntVector
}, 
1761     {"import", k_type_import
, parseImport
}, 
1762     {"include", k_type_include
, parseInclude
}, 
1763     {"process(uca_rules)", k_type_plugin_uca_rules
, parseUCARules
}, 
1764     {"process(collation)", k_type_plugin_collation
, NULL 
/* not implemented yet */}, 
1765     {"process(transliterator)", k_type_plugin_transliterator
, parseTransliterator
}, 
1766     {"process(dependency)", k_type_plugin_dependency
, parseDependency
}, 
1767     {"reserved", NULL
, NULL
} 
1772     U_STRING_INIT(k_type_string
,    "string",    6); 
1773     U_STRING_INIT(k_type_binary
,    "binary",    6); 
1774     U_STRING_INIT(k_type_bin
,       "bin",       3); 
1775     U_STRING_INIT(k_type_table
,     "table",     5); 
1776     U_STRING_INIT(k_type_table_no_fallback
,     "table(nofallback)",         17); 
1777     U_STRING_INIT(k_type_int
,       "int",       3); 
1778     U_STRING_INIT(k_type_integer
,   "integer",   7); 
1779     U_STRING_INIT(k_type_array
,     "array",     5); 
1780     U_STRING_INIT(k_type_alias
,     "alias",     5); 
1781     U_STRING_INIT(k_type_intvector
, "intvector", 9); 
1782     U_STRING_INIT(k_type_import
,    "import",    6); 
1783     U_STRING_INIT(k_type_include
,   "include",   7); 
1785     U_STRING_INIT(k_type_plugin_uca_rules
,      "process(uca_rules)",        18); 
1786     U_STRING_INIT(k_type_plugin_collation
,      "process(collation)",        18); 
1787     U_STRING_INIT(k_type_plugin_transliterator
, "process(transliterator)",   23); 
1788     U_STRING_INIT(k_type_plugin_dependency
,     "process(dependency)",       19); 
1791 static inline UBool 
isTable(enum EResourceType type
) { 
1792     return (UBool
)(type
==RESTYPE_TABLE 
|| type
==RESTYPE_TABLE_NO_FALLBACK
); 
1795 static enum EResourceType
 
1796 parseResourceType(ParseState
* state
, UErrorCode 
*status
) 
1798     struct UString        
*tokenValue
; 
1799     struct UString        comment
; 
1800     enum   EResourceType  result 
= RESTYPE_UNKNOWN
; 
1802     ustr_init(&comment
); 
1803     expect(state
, TOK_STRING
, &tokenValue
, &comment
, &line
, status
); 
1805     if (U_FAILURE(*status
)) 
1807         return RESTYPE_UNKNOWN
; 
1810     *status 
= U_ZERO_ERROR
; 
1812     /* Search for normal types */ 
1813     result
=RESTYPE_UNKNOWN
; 
1814     while ((result
=(EResourceType
)(result
+1)) < RESTYPE_RESERVED
) { 
1815         if (u_strcmp(tokenValue
->fChars
, gResourceTypes
[result
].nameUChars
) == 0) { 
1819     /* Now search for the aliases */ 
1820     if (u_strcmp(tokenValue
->fChars
, k_type_int
) == 0) { 
1821         result 
= RESTYPE_INTEGER
; 
1823     else if (u_strcmp(tokenValue
->fChars
, k_type_bin
) == 0) { 
1824         result 
= RESTYPE_BINARY
; 
1826     else if (result 
== RESTYPE_RESERVED
) { 
1827         char tokenBuffer
[1024]; 
1828         u_austrncpy(tokenBuffer
, tokenValue
->fChars
, sizeof(tokenBuffer
)); 
1829         tokenBuffer
[sizeof(tokenBuffer
) - 1] = 0; 
1830         *status 
= U_INVALID_FORMAT_ERROR
; 
1831         error(line
, "unknown resource type '%s'", tokenBuffer
); 
1837 /* parse a non-top-level resource */ 
1838 static struct SResource 
* 
1839 parseResource(ParseState
* state
, char *tag
, const struct UString 
*comment
, UErrorCode 
*status
) 
1841     enum   ETokenType      token
; 
1842     enum   EResourceType  resType 
= RESTYPE_UNKNOWN
; 
1843     ParseResourceFunction 
*parseFunction 
= NULL
; 
1844     struct UString        
*tokenValue
; 
1849     token 
= getToken(state
, &tokenValue
, NULL
, &startline
, status
); 
1852         printf(" resource %s at line %i \n",  (tag 
== NULL
) ? "(null)" : tag
, (int)startline
); 
1855     /* name . [ ':' type ] '{' resource '}' */ 
1856     /* This function parses from the colon onwards.  If the colon is present, parse the 
1857     type then try to parse a resource of that type.  If there is no explicit type, 
1858     work it out using the lookahead tokens. */ 
1862         *status 
= U_INVALID_FORMAT_ERROR
; 
1863         error(startline
, "Unexpected EOF encountered"); 
1867         *status 
= U_INVALID_FORMAT_ERROR
; 
1871         resType 
= parseResourceType(state
, status
); 
1872         expect(state
, TOK_OPEN_BRACE
, &tokenValue
, NULL
, &startline
, status
); 
1874         if (U_FAILURE(*status
)) 
1881     case TOK_OPEN_BRACE
: 
1885         *status 
= U_INVALID_FORMAT_ERROR
; 
1886         error(startline
, "syntax error while reading a resource, expected '{' or ':'"); 
1891     if (resType 
== RESTYPE_UNKNOWN
) 
1893         /* No explicit type, so try to work it out.  At this point, we've read the first '{'. 
1894         We could have any of the following: 
1895         { {         => array (nested) 
1897         { string ,  => string array 
1901         { string :/{    => table 
1902         { string }      => string 
1905         token 
= peekToken(state
, 0, NULL
, &line
, NULL
,status
); 
1907         if (U_FAILURE(*status
)) 
1912         if (token 
== TOK_OPEN_BRACE 
|| token 
== TOK_COLON 
||token 
==TOK_CLOSE_BRACE 
) 
1914             resType 
= RESTYPE_ARRAY
; 
1916         else if (token 
== TOK_STRING
) 
1918             token 
= peekToken(state
, 1, NULL
, &line
, NULL
, status
); 
1920             if (U_FAILURE(*status
)) 
1927             case TOK_COMMA
:         resType 
= RESTYPE_ARRAY
;  break; 
1928             case TOK_OPEN_BRACE
:    resType 
= RESTYPE_TABLE
;  break; 
1929             case TOK_CLOSE_BRACE
:   resType 
= RESTYPE_STRING
; break; 
1930             case TOK_COLON
:         resType 
= RESTYPE_TABLE
;  break; 
1932                 *status 
= U_INVALID_FORMAT_ERROR
; 
1933                 error(line
, "Unexpected token after string, expected ',', '{' or '}'"); 
1939             *status 
= U_INVALID_FORMAT_ERROR
; 
1940             error(line
, "Unexpected token after '{'"); 
1944         /* printf("Type guessed as %s\n", resourceNames[resType]); */ 
1945     } else if(resType 
== RESTYPE_TABLE_NO_FALLBACK
) { 
1946         *status 
= U_INVALID_FORMAT_ERROR
; 
1947         error(startline
, "error: %s resource type not valid except on top bundle level", gResourceTypes
[resType
].nameChars
); 
1952     /* We should now know what we need to parse next, so call the appropriate parser 
1953     function and return. */ 
1954     parseFunction 
= gResourceTypes
[resType
].parseFunction
; 
1955     if (parseFunction 
!= NULL
) { 
1956         return parseFunction(state
, tag
, startline
, comment
, status
); 
1959         *status 
= U_INTERNAL_PROGRAM_ERROR
; 
1960         error(startline
, "internal error: %s resource type found and not handled", gResourceTypes
[resType
].nameChars
); 
1966 /* parse the top-level resource */ 
1968 parse(UCHARBUF 
*buf
, const char *inputDir
, const char *outputDir
, const char *filename
, 
1969       UBool makeBinaryCollation
, UBool omitCollationRules
, UErrorCode 
*status
) 
1971     struct UString    
*tokenValue
; 
1972     struct UString    comment
; 
1974     enum EResourceType bundleType
; 
1975     enum ETokenType    token
; 
1980     for (i 
= 0; i 
< MAX_LOOKAHEAD 
+ 1; i
++) 
1982         ustr_init(&state
.lookahead
[i
].value
); 
1983         ustr_init(&state
.lookahead
[i
].comment
); 
1986     initLookahead(&state
, buf
, status
); 
1988     state
.inputdir       
= inputDir
; 
1989     state
.inputdirLength 
= (state
.inputdir 
!= NULL
) ? (uint32_t)uprv_strlen(state
.inputdir
) : 0; 
1990     state
.outputdir       
= outputDir
; 
1991     state
.outputdirLength 
= (state
.outputdir 
!= NULL
) ? (uint32_t)uprv_strlen(state
.outputdir
) : 0; 
1992     state
.filename 
= filename
; 
1993     state
.makeBinaryCollation 
= makeBinaryCollation
; 
1994     state
.omitCollationRules 
= omitCollationRules
; 
1996     ustr_init(&comment
); 
1997     expect(&state
, TOK_STRING
, &tokenValue
, &comment
, NULL
, status
); 
1999     state
.bundle 
= new SRBRoot(&comment
, FALSE
, *status
); 
2001     if (state
.bundle 
== NULL 
|| U_FAILURE(*status
)) 
2003         delete state
.bundle
; 
2009     state
.bundle
->setLocale(tokenValue
->fChars
, *status
); 
2011     /* The following code is to make Empty bundle work no matter with :table specifer or not */ 
2012     token 
= getToken(&state
, NULL
, NULL
, &line
, status
); 
2013     if(token
==TOK_COLON
) { 
2014         *status
=U_ZERO_ERROR
; 
2015         bundleType
=parseResourceType(&state
, status
); 
2017         if(isTable(bundleType
)) 
2019             expect(&state
, TOK_OPEN_BRACE
, NULL
, NULL
, &line
, status
); 
2023             *status
=U_PARSE_ERROR
; 
2024              error(line
, "parse error. Stopped parsing with %s", u_errorName(*status
)); 
2030         if(token
==TOK_OPEN_BRACE
) 
2032             *status
=U_ZERO_ERROR
; 
2033             bundleType
=RESTYPE_TABLE
; 
2037             /* neither colon nor open brace */ 
2038             *status
=U_PARSE_ERROR
; 
2039             bundleType
=RESTYPE_UNKNOWN
; 
2040             error(line
, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status
)); 
2044     if (U_FAILURE(*status
)) 
2046         delete state
.bundle
; 
2050     if(bundleType
==RESTYPE_TABLE_NO_FALLBACK
) { 
2052          * Parse a top-level table with the table(nofallback) declaration. 
2053          * This is the same as a regular table, but also sets the 
2054          * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] . 
2056         state
.bundle
->fNoFallback
=TRUE
; 
2058     /* top-level tables need not handle special table names like "collations" */ 
2059     assert(!state
.bundle
->fIsPoolBundle
); 
2060     assert(state
.bundle
->fRoot
->fType 
== URES_TABLE
); 
2061     TableResource 
*rootTable 
= static_cast<TableResource 
*>(state
.bundle
->fRoot
); 
2062     realParseTable(&state
, rootTable
, NULL
, line
, status
); 
2063     if(dependencyArray
!=NULL
){ 
2064         rootTable
->add(dependencyArray
, 0, *status
); 
2065         dependencyArray 
= NULL
; 
2067    if (U_FAILURE(*status
)) 
2069         delete state
.bundle
; 
2070         res_close(dependencyArray
); 
2074     if (getToken(&state
, NULL
, NULL
, &line
, status
) != TOK_EOF
) 
2076         warning(line
, "extraneous text after resource bundle (perhaps unmatched braces)"); 
2078             *status 
= U_INVALID_FORMAT_ERROR
; 
2083     cleanupLookahead(&state
); 
2084     ustr_deinit(&comment
); 
2085     return state
.bundle
;