]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/genrb/parse.c
ICU-461.18.tar.gz
[apple/icu.git] / icuSources / tools / genrb / parse.c
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
3*
729e4ab9 4* Copyright (C) 1998-2010, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8*
9* File parse.c
10*
11* Modification History:
12*
13* Date Name Description
14* 05/26/99 stephen Creation.
15* 02/25/00 weiv Overhaul to write udata
16* 5/10/01 Ram removed ustdio dependency
17* 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
18*******************************************************************************
19*/
20
21#include "ucol_imp.h"
22#include "parse.h"
23#include "errmsg.h"
24#include "uhash.h"
25#include "cmemory.h"
26#include "cstring.h"
374ca955 27#include "uinvchar.h"
b75a7d8f
A
28#include "read.h"
29#include "ustr.h"
30#include "reslist.h"
73c04bcf 31#include "rbt_pars.h"
729e4ab9 32#include "genrb.h"
b75a7d8f 33#include "unicode/ustring.h"
729e4ab9 34#include "unicode/uscript.h"
b75a7d8f 35#include "unicode/putil.h"
73c04bcf 36#include <stdio.h>
b75a7d8f 37
729e4ab9
A
38extern UBool gIncludeUnihanColl;
39
b75a7d8f 40/* Number of tokens to read ahead of the current stream position */
374ca955 41#define MAX_LOOKAHEAD 3
b75a7d8f 42
b75a7d8f
A
43#define CR 0x000D
44#define LF 0x000A
45#define SPACE 0x0020
374ca955 46#define TAB 0x0009
b75a7d8f
A
47#define ESCAPE 0x005C
48#define HASH 0x0023
49#define QUOTE 0x0027
73c04bcf 50#define ZERO 0x0030
b75a7d8f
A
51#define STARTCOMMAND 0x005B
52#define ENDCOMMAND 0x005D
73c04bcf
A
53#define OPENSQBRACKET 0x005B
54#define CLOSESQBRACKET 0x005D
b75a7d8f 55
b75a7d8f
A
56struct Lookahead
57{
58 enum ETokenType type;
374ca955
A
59 struct UString value;
60 struct UString comment;
61 uint32_t line;
b75a7d8f
A
62};
63
64/* keep in sync with token defines in read.h */
374ca955 65const char *tokenNames[TOK_TOKEN_COUNT] =
b75a7d8f
A
66{
67 "string", /* A string token, such as "MonthNames" */
68 "'{'", /* An opening brace character */
69 "'}'", /* A closing brace character */
70 "','", /* A comma */
71 "':'", /* A colon */
72
73 "<end of file>", /* End of the file has been reached successfully */
374ca955 74 "<end of line>"
b75a7d8f
A
75};
76
77/* Just to store "TRUE" */
78static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
79
729e4ab9
A
80typedef struct {
81 struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
82 uint32_t lookaheadPosition;
83 UCHARBUF *buffer;
84 struct SRBRoot *bundle;
85 const char *inputdir;
86 uint32_t inputdirLength;
87 const char *outputdir;
88 uint32_t outputdirLength;
89} ParseState;
b75a7d8f
A
90
91static UBool gMakeBinaryCollation = TRUE;
46f4442e 92static UBool gOmitCollationRules = FALSE;
b75a7d8f 93
729e4ab9
A
94typedef struct SResource *
95ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
96
97static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
b75a7d8f 98
b75a7d8f
A
99/* The nature of the lookahead buffer:
100 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
101 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
102 When getToken is called, the current pointer is moved to the next slot and the
103 old slot is filled with the next token from the reader by calling getNextToken.
104 The token values are stored in the slot, which means that token values don't
105 survive a call to getToken, ie.
106
107 UString *value;
108
109 getToken(&value, NULL, status);
110 getToken(NULL, NULL, status); bad - value is now a different string
111*/
112static void
729e4ab9 113initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
b75a7d8f
A
114{
115 static uint32_t initTypeStrings = 0;
116 uint32_t i;
117
118 if (!initTypeStrings)
119 {
120 initTypeStrings = 1;
121 }
122
729e4ab9
A
123 state->lookaheadPosition = 0;
124 state->buffer = buf;
b75a7d8f
A
125
126 resetLineNumber();
127
128 for (i = 0; i < MAX_LOOKAHEAD; i++)
129 {
729e4ab9 130 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
b75a7d8f
A
131 if (U_FAILURE(*status))
132 {
133 return;
134 }
135 }
136
137 *status = U_ZERO_ERROR;
138}
139
46f4442e 140static void
729e4ab9 141cleanupLookahead(ParseState* state)
46f4442e
A
142{
143 uint32_t i;
144 for (i = 0; i < MAX_LOOKAHEAD; i++)
145 {
729e4ab9
A
146 ustr_deinit(&state->lookahead[i].value);
147 ustr_deinit(&state->lookahead[i].comment);
46f4442e
A
148 }
149
150}
151
b75a7d8f 152static enum ETokenType
729e4ab9 153getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
b75a7d8f
A
154{
155 enum ETokenType result;
156 uint32_t i;
157
729e4ab9 158 result = state->lookahead[state->lookaheadPosition].type;
b75a7d8f
A
159
160 if (tokenValue != NULL)
161 {
729e4ab9 162 *tokenValue = &state->lookahead[state->lookaheadPosition].value;
b75a7d8f
A
163 }
164
165 if (linenumber != NULL)
166 {
729e4ab9 167 *linenumber = state->lookahead[state->lookaheadPosition].line;
b75a7d8f
A
168 }
169
374ca955
A
170 if (comment != NULL)
171 {
729e4ab9 172 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
374ca955
A
173 }
174
729e4ab9
A
175 i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
176 state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
177 ustr_setlen(&state->lookahead[i].comment, 0, status);
178 ustr_setlen(&state->lookahead[i].value, 0, status);
179 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
b75a7d8f
A
180
181 /* printf("getToken, returning %s\n", tokenNames[result]); */
182
183 return result;
184}
185
186static enum ETokenType
729e4ab9 187peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
b75a7d8f 188{
729e4ab9 189 uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
b75a7d8f
A
190
191 if (U_FAILURE(*status))
192 {
193 return TOK_ERROR;
194 }
195
196 if (lookaheadCount >= MAX_LOOKAHEAD)
197 {
198 *status = U_INTERNAL_PROGRAM_ERROR;
199 return TOK_ERROR;
200 }
201
202 if (tokenValue != NULL)
203 {
729e4ab9 204 *tokenValue = &state->lookahead[i].value;
b75a7d8f
A
205 }
206
207 if (linenumber != NULL)
208 {
729e4ab9 209 *linenumber = state->lookahead[i].line;
b75a7d8f
A
210 }
211
374ca955 212 if(comment != NULL){
729e4ab9 213 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
374ca955
A
214 }
215
729e4ab9 216 return state->lookahead[i].type;
b75a7d8f
A
217}
218
219static void
729e4ab9 220expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
b75a7d8f
A
221{
222 uint32_t line;
374ca955 223
729e4ab9 224 enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
b75a7d8f 225
73c04bcf 226 if (linenumber != NULL)
b75a7d8f 227 {
73c04bcf 228 *linenumber = line;
b75a7d8f
A
229 }
230
73c04bcf 231 if (U_FAILURE(*status))
b75a7d8f 232 {
73c04bcf 233 return;
b75a7d8f
A
234 }
235
236 if (token != expectedToken)
237 {
238 *status = U_INVALID_FORMAT_ERROR;
239 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
240 }
46f4442e 241 else
b75a7d8f
A
242 {
243 *status = U_ZERO_ERROR;
244 }
245}
246
729e4ab9 247static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
b75a7d8f
A
248{
249 struct UString *tokenValue;
250 char *result;
251 uint32_t count;
252
729e4ab9 253 expect(state, TOK_STRING, &tokenValue, comment, line, status);
b75a7d8f
A
254
255 if (U_FAILURE(*status))
256 {
257 return NULL;
258 }
259
374ca955
A
260 count = u_strlen(tokenValue->fChars);
261 if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
262 *status = U_INVALID_FORMAT_ERROR;
263 error(*line, "invariant characters required for table keys, binary data, etc.");
264 return NULL;
265 }
266
267 result = uprv_malloc(count+1);
b75a7d8f
A
268
269 if (result == NULL)
270 {
271 *status = U_MEMORY_ALLOCATION_ERROR;
272 return NULL;
273 }
274
374ca955 275 u_UCharsToChars(tokenValue->fChars, result, count+1);
b75a7d8f
A
276 return result;
277}
278
b75a7d8f 279static struct SResource *
729e4ab9 280parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
b75a7d8f
A
281{
282 struct SResource *result = NULL;
283 struct UString *tokenValue;
284 FileStream *file = NULL;
285 char filename[256] = { '\0' };
286 char cs[128] = { '\0' };
287 uint32_t line;
288 int len=0;
289 UBool quoted = FALSE;
290 UCHARBUF *ucbuf=NULL;
291 UChar32 c = 0;
292 const char* cp = NULL;
293 UChar *pTarget = NULL;
294 UChar *target = NULL;
295 UChar *targetLimit = NULL;
296 int32_t size = 0;
297
729e4ab9 298 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
b75a7d8f
A
299
300 if(isVerbose()){
374ca955 301 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
302 }
303
304 if (U_FAILURE(*status))
305 {
306 return NULL;
307 }
308 /* make the filename including the directory */
729e4ab9 309 if (state->inputdir != NULL)
b75a7d8f 310 {
729e4ab9 311 uprv_strcat(filename, state->inputdir);
b75a7d8f 312
729e4ab9 313 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
b75a7d8f
A
314 {
315 uprv_strcat(filename, U_FILE_SEP_STRING);
316 }
317 }
318
319 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
320
729e4ab9 321 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
322
323 if (U_FAILURE(*status))
324 {
325 return NULL;
326 }
b75a7d8f
A
327 uprv_strcat(filename, cs);
328
46f4442e
A
329 if(gOmitCollationRules) {
330 return res_none();
331 }
b75a7d8f
A
332
333 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
334
335 if (U_FAILURE(*status)) {
336 error(line, "An error occured while opening the input file %s\n", filename);
337 return NULL;
338 }
339
340 /* We allocate more space than actually required
341 * since the actual size needed for storing UChars
342 * is not known in UTF-8 byte stream
343 */
73c04bcf 344 size = ucbuf_size(ucbuf) + 1;
b75a7d8f 345 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
374ca955 346 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
b75a7d8f
A
347 target = pTarget;
348 targetLimit = pTarget+size;
349
350 /* read the rules into the buffer */
351 while (target < targetLimit)
352 {
353 c = ucbuf_getc(ucbuf, status);
354 if(c == QUOTE) {
73c04bcf 355 quoted = (UBool)!quoted;
b75a7d8f
A
356 }
357 /* weiv (06/26/2002): adding the following:
358 * - preserving spaces in commands [...]
359 * - # comments until the end of line
360 */
374ca955 361 if (c == STARTCOMMAND && !quoted)
b75a7d8f 362 {
73c04bcf
A
363 /* preserve commands
364 * closing bracket will be handled by the
365 * append at the end of the loop
366 */
367 while(c != ENDCOMMAND) {
368 U_APPEND_CHAR32(c, target,len);
369 c = ucbuf_getc(ucbuf, status);
370 }
371 }
372 else if (c == HASH && !quoted) {
373 /* skip comments */
374 while(c != CR && c != LF) {
375 c = ucbuf_getc(ucbuf, status);
376 }
377 continue;
378 }
379 else if (c == ESCAPE)
b75a7d8f
A
380 {
381 c = unescape(ucbuf, status);
382
383 if (c == U_ERR)
384 {
385 uprv_free(pTarget);
386 T_FileStream_close(file);
387 return NULL;
388 }
389 }
374ca955 390 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
b75a7d8f 391 {
73c04bcf
A
392 /* ignore spaces carriage returns
393 * and line feed unless in the form \uXXXX
b75a7d8f
A
394 */
395 continue;
396 }
397
398 /* Append UChar * after dissembling if c > 0xffff*/
399 if (c != U_EOF)
400 {
401 U_APPEND_CHAR32(c, target,len);
402 }
403 else
404 {
405 break;
406 }
407 }
408
374ca955
A
409 /* terminate the string */
410 if(target < targetLimit){
411 *target = 0x0000;
412 }
413
729e4ab9 414 result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
374ca955 415
b75a7d8f
A
416
417 ucbuf_close(ucbuf);
418 uprv_free(pTarget);
419 T_FileStream_close(file);
420
421 return result;
422}
423
73c04bcf 424static struct SResource *
729e4ab9 425parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
73c04bcf
A
426{
427 struct SResource *result = NULL;
428 struct UString *tokenValue;
429 FileStream *file = NULL;
430 char filename[256] = { '\0' };
431 char cs[128] = { '\0' };
432 uint32_t line;
433 UCHARBUF *ucbuf=NULL;
434 const char* cp = NULL;
435 UChar *pTarget = NULL;
436 const UChar *pSource = NULL;
437 int32_t size = 0;
438
729e4ab9 439 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
73c04bcf
A
440
441 if(isVerbose()){
442 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
443 }
444
445 if (U_FAILURE(*status))
446 {
447 return NULL;
448 }
449 /* make the filename including the directory */
729e4ab9 450 if (state->inputdir != NULL)
73c04bcf 451 {
729e4ab9 452 uprv_strcat(filename, state->inputdir);
73c04bcf 453
729e4ab9 454 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
73c04bcf
A
455 {
456 uprv_strcat(filename, U_FILE_SEP_STRING);
457 }
458 }
459
460 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
461
729e4ab9 462 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
73c04bcf
A
463
464 if (U_FAILURE(*status))
465 {
466 return NULL;
467 }
468 uprv_strcat(filename, cs);
469
470
471 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
472
473 if (U_FAILURE(*status)) {
474 error(line, "An error occured while opening the input file %s\n", filename);
475 return NULL;
476 }
477
478 /* We allocate more space than actually required
479 * since the actual size needed for storing UChars
480 * is not known in UTF-8 byte stream
481 */
482 pSource = ucbuf_getBuffer(ucbuf, &size, status);
483 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
484 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
485
486#if !UCONFIG_NO_TRANSLITERATION
487 size = utrans_stripRules(pSource, size, pTarget, status);
488#else
46f4442e 489 size = 0;
73c04bcf
A
490 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
491#endif
729e4ab9 492 result = string_open(state->bundle, tag, pTarget, size, NULL, status);
73c04bcf
A
493
494 ucbuf_close(ucbuf);
495 uprv_free(pTarget);
496 T_FileStream_close(file);
497
498 return result;
499}
500static struct SResource* dependencyArray = NULL;
501
502static struct SResource *
729e4ab9 503parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
73c04bcf
A
504{
505 struct SResource *result = NULL;
506 struct SResource *elem = NULL;
507 struct UString *tokenValue;
508 uint32_t line;
509 char filename[256] = { '\0' };
510 char cs[128] = { '\0' };
511
729e4ab9 512 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
73c04bcf
A
513
514 if(isVerbose()){
515 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
516 }
517
518 if (U_FAILURE(*status))
519 {
520 return NULL;
521 }
522 /* make the filename including the directory */
729e4ab9 523 if (state->outputdir != NULL)
73c04bcf 524 {
729e4ab9 525 uprv_strcat(filename, state->outputdir);
73c04bcf 526
729e4ab9 527 if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
73c04bcf
A
528 {
529 uprv_strcat(filename, U_FILE_SEP_STRING);
530 }
531 }
532
533 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
534
535 if (U_FAILURE(*status))
536 {
537 return NULL;
538 }
539 uprv_strcat(filename, cs);
540 if(!T_FileStream_file_exists(filename)){
541 if(isStrict()){
542 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
543 }else{
544 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
545 }
546 }
547 if(dependencyArray==NULL){
729e4ab9 548 dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
73c04bcf
A
549 }
550 if(tag!=NULL){
729e4ab9 551 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
73c04bcf 552 }
729e4ab9 553 elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
73c04bcf
A
554
555 array_add(dependencyArray, elem, status);
556
557 if (U_FAILURE(*status))
558 {
559 return NULL;
560 }
729e4ab9 561 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
73c04bcf
A
562 return result;
563}
b75a7d8f 564static struct SResource *
729e4ab9 565parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
b75a7d8f
A
566{
567 struct UString *tokenValue;
568 struct SResource *result = NULL;
569
73c04bcf 570/* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
b75a7d8f
A
571 {
572 return parseUCARules(tag, startline, status);
73c04bcf 573 }*/
b75a7d8f 574 if(isVerbose()){
374ca955 575 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f 576 }
729e4ab9 577 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
b75a7d8f
A
578
579 if (U_SUCCESS(*status))
580 {
581 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
582 doesn't survive expect either) */
583
729e4ab9 584 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
b75a7d8f 585 if(U_SUCCESS(*status) && result) {
729e4ab9 586 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f 587
46f4442e
A
588 if (U_FAILURE(*status))
589 {
590 res_close(result);
591 return NULL;
592 }
b75a7d8f
A
593 }
594 }
595
596 return result;
597}
598
599static struct SResource *
729e4ab9 600parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
601{
602 struct UString *tokenValue;
374ca955 603 struct SResource *result = NULL;
b75a7d8f 604
729e4ab9 605 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
b75a7d8f
A
606
607 if(isVerbose()){
374ca955 608 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
609 }
610
611 if (U_SUCCESS(*status))
612 {
613 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
614 doesn't survive expect either) */
615
729e4ab9 616 result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
b75a7d8f 617
729e4ab9 618 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
619
620 if (U_FAILURE(*status))
621 {
46f4442e 622 res_close(result);
b75a7d8f
A
623 return NULL;
624 }
625 }
626
627 return result;
628}
629
729e4ab9
A
630typedef struct{
631 const char* inputDir;
632 const char* outputDir;
633} GenrbData;
634
635static struct SResource* resLookup(struct SResource* res, const char* key){
636 struct SResource *current = NULL;
637 struct SResTable *list;
638 if (res == res_none()) {
639 return NULL;
640 }
641
642 list = &(res->u.fTable);
643
644 current = list->fFirst;
645 while (current != NULL) {
646 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
647 return current;
648 }
649 current = current->fNext;
650 }
651 return NULL;
652}
653
654static const UChar* importFromDataFile(void* context, const char* locale, const char* type, int32_t* pLength, UErrorCode* status){
655 struct SRBRoot *data = NULL;
656 UCHARBUF *ucbuf = NULL;
657 GenrbData* genrbdata = (GenrbData*) context;
658 int localeLength = strlen(locale);
659 char* filename = (char*)uprv_malloc(localeLength+5);
660 char *inputDirBuf = NULL;
661 char *openFileName = NULL;
662 const char* cp = "";
663 UChar* urules = NULL;
664 int32_t urulesLength = 0;
665 int32_t i = 0;
666 int32_t dirlen = 0;
667 int32_t filelen = 0;
668 struct SResource* root;
669 struct SResource* collations;
670 struct SResource* collation;
671 struct SResource* sequence;
672
673 memcpy(filename, locale, localeLength);
674 for(i = 0; i < localeLength; i++){
675 if(filename[i] == '-'){
676 filename[i] = '_';
677 }
678 }
679 filename[localeLength] = '.';
680 filename[localeLength+1] = 't';
681 filename[localeLength+2] = 'x';
682 filename[localeLength+3] = 't';
683 filename[localeLength+4] = 0;
684
685
686 if (status==NULL || U_FAILURE(*status)) {
687 return NULL;
688 }
689 if(filename==NULL){
690 *status=U_ILLEGAL_ARGUMENT_ERROR;
691 return NULL;
692 }else{
693 filelen = (int32_t)uprv_strlen(filename);
694 }
695 if(genrbdata->inputDir == NULL) {
696 const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
697 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
698 openFileName[0] = '\0';
699 if (filenameBegin != NULL) {
700 /*
701 * When a filename ../../../data/root.txt is specified,
702 * we presume that the input directory is ../../../data
703 * This is very important when the resource file includes
704 * another file, like UCARules.txt or thaidict.brk.
705 */
706 int32_t filenameSize = (int32_t)(filenameBegin - filename + 1);
707 inputDirBuf = uprv_strncpy((char *)uprv_malloc(filenameSize), filename, filenameSize);
708
709 /* test for NULL */
710 if(inputDirBuf == NULL) {
711 *status = U_MEMORY_ALLOCATION_ERROR;
712 goto finish;
713 }
714
715 inputDirBuf[filenameSize - 1] = 0;
716 genrbdata->inputDir = inputDirBuf;
717 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir);
718 }
719 }else{
720 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir);
721
722 if(genrbdata->inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
723 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
724
725 /* test for NULL */
726 if(openFileName == NULL) {
727 *status = U_MEMORY_ALLOCATION_ERROR;
728 goto finish;
729 }
730
731 openFileName[0] = '\0';
732 /*
733 * append the input dir to openFileName if the first char in
734 * filename is not file seperation char and the last char input directory is not '.'.
735 * This is to support :
736 * genrb -s. /home/icu/data
737 * genrb -s. icu/data
738 * The user cannot mix notations like
739 * genrb -s. /icu/data --- the absolute path specified. -s redundant
740 * user should use
741 * genrb -s. icu/data --- start from CWD and look in icu/data dir
742 */
743 if( (filename[0] != U_FILE_SEP_CHAR) && (genrbdata->inputDir[dirlen-1] !='.')){
744 uprv_strcpy(openFileName, genrbdata->inputDir);
745 openFileName[dirlen] = U_FILE_SEP_CHAR;
746 }
747 openFileName[dirlen + 1] = '\0';
748 } else {
749 openFileName = (char *) uprv_malloc(dirlen + filelen + 1);
750
751 /* test for NULL */
752 if(openFileName == NULL) {
753 *status = U_MEMORY_ALLOCATION_ERROR;
754 goto finish;
755 }
756
757 uprv_strcpy(openFileName, genrbdata->inputDir);
758
759 }
760 }
761 uprv_strcat(openFileName, filename);
762 /* printf("%s\n", openFileName); */
763 *status = U_ZERO_ERROR;
764 ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, status);
765
766 if(*status == U_FILE_ACCESS_ERROR) {
767
768 fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName);
769 goto finish;
770 }
771 if (ucbuf == NULL || U_FAILURE(*status)) {
772 fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(*status));
773 goto finish;
774 }
775
776 /* Parse the data into an SRBRoot */
777 data = parse(ucbuf, genrbdata->inputDir, genrbdata->outputDir, status);
778
779 root = data->fRoot;
780 collations = resLookup(root, "collations");
781 collation = resLookup(collations, type);
782 sequence = resLookup(collation, "Sequence");
783 urules = sequence->u.fString.fChars;
784 urulesLength = sequence->u.fString.fLength;
785 *pLength = urulesLength;
786
787finish:
788
789 if (inputDirBuf != NULL) {
790 uprv_free(inputDirBuf);
791 }
792
793 if (openFileName != NULL) {
794 uprv_free(openFileName);
795 }
796
797 if(ucbuf) {
798 ucbuf_close(ucbuf);
799 }
800
801 return urules;
802}
803
b75a7d8f 804static struct SResource *
729e4ab9 805addCollation(ParseState* state, struct SResource *result, uint32_t startline, UErrorCode *status)
b75a7d8f 806{
b75a7d8f
A
807 struct SResource *member = NULL;
808 struct UString *tokenValue;
374ca955 809 struct UString comment;
b75a7d8f
A
810 enum ETokenType token;
811 char subtag[1024];
812 UVersionInfo version;
813 UBool override = FALSE;
814 uint32_t line;
729e4ab9 815 GenrbData genrbdata;
b75a7d8f 816 /* '{' . (name resource)* '}' */
374ca955
A
817 version[0]=0; version[1]=0; version[2]=0; version[3]=0;
818
b75a7d8f
A
819 for (;;)
820 {
374ca955 821 ustr_init(&comment);
729e4ab9 822 token = getToken(state, &tokenValue, &comment, &line, status);
b75a7d8f
A
823
824 if (token == TOK_CLOSE_BRACE)
825 {
826 return result;
827 }
828
829 if (token != TOK_STRING)
830 {
46f4442e 831 res_close(result);
b75a7d8f
A
832 *status = U_INVALID_FORMAT_ERROR;
833
834 if (token == TOK_EOF)
835 {
836 error(startline, "unterminated table");
837 }
838 else
839 {
840 error(line, "Unexpected token %s", tokenNames[token]);
841 }
842
843 return NULL;
844 }
845
846 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
847
848 if (U_FAILURE(*status))
849 {
46f4442e 850 res_close(result);
b75a7d8f
A
851 return NULL;
852 }
853
729e4ab9 854 member = parseResource(state, subtag, NULL, status);
b75a7d8f
A
855
856 if (U_FAILURE(*status))
857 {
46f4442e 858 res_close(result);
b75a7d8f
A
859 return NULL;
860 }
861
862 if (uprv_strcmp(subtag, "Version") == 0)
863 {
864 char ver[40];
865 int32_t length = member->u.fString.fLength;
866
867 if (length >= (int32_t) sizeof(ver))
868 {
869 length = (int32_t) sizeof(ver) - 1;
870 }
871
872 u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
873 u_versionFromString(version, ver);
874
875 table_add(result, member, line, status);
876
877 }
878 else if (uprv_strcmp(subtag, "Override") == 0)
879 {
880 override = FALSE;
881
882 if (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0)
883 {
884 override = TRUE;
885 }
886 table_add(result, member, line, status);
887
888 }
889 else if(uprv_strcmp(subtag, "%%CollationBin")==0)
890 {
891 /* discard duplicate %%CollationBin if any*/
892 }
893 else if (uprv_strcmp(subtag, "Sequence") == 0)
894 {
729e4ab9
A
895#if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
896 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
b75a7d8f 897#else
b75a7d8f
A
898 if(gMakeBinaryCollation) {
899 UErrorCode intStatus = U_ZERO_ERROR;
900
901 /* do the collation elements */
902 int32_t len = 0;
903 uint8_t *data = NULL;
904 UCollator *coll = NULL;
729e4ab9
A
905 int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)];
906 uint32_t reorderCodeCount;
907 int32_t reorderCodeIndex;
b75a7d8f 908 UParseError parseError;
b75a7d8f 909
729e4ab9
A
910 genrbdata.inputDir = state->inputdir;
911 genrbdata.outputDir = state->outputdir;
912
913 coll = ucol_openRulesForImport(member->u.fString.fChars, member->u.fString.fLength,
914 UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, importFromDataFile, &genrbdata, &intStatus);
b75a7d8f
A
915
916 if (U_SUCCESS(intStatus) && coll != NULL)
917 {
374ca955
A
918 len = ucol_cloneBinary(coll, NULL, 0, &intStatus);
919 data = (uint8_t *)uprv_malloc(len);
73c04bcf 920 intStatus = U_ZERO_ERROR;
374ca955
A
921 len = ucol_cloneBinary(coll, data, len, &intStatus);
922 /*data = ucol_cloneRuleData(coll, &len, &intStatus);*/
b75a7d8f
A
923
924 /* tailoring rules version */
925 /* This is wrong! */
926 /*coll->dataInfo.dataVersion[1] = version[0];*/
927 /* Copy tailoring version. Builder version already */
928 /* set in ucol_openRules */
929 ((UCATableHeader *)data)->version[1] = version[0];
930 ((UCATableHeader *)data)->version[2] = version[1];
931 ((UCATableHeader *)data)->version[3] = version[2];
932
933 if (U_SUCCESS(intStatus) && data != NULL)
934 {
729e4ab9
A
935 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", len, data, NULL, NULL, status);
936 table_add(result, collationBin, line, status);
b75a7d8f 937 uprv_free(data);
729e4ab9
A
938
939 reorderCodeCount = ucol_getReorderCodes(
940 coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST), &intStatus);
941 if (U_SUCCESS(intStatus) && reorderCodeCount > 0) {
942 struct SResource *reorderCodeRes = intvector_open(state->bundle, "%%ReorderCodes", NULL, status);
943 for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodeCount; reorderCodeIndex++) {
944 intvector_add(reorderCodeRes, reorderCodes[reorderCodeIndex], status);
945 }
946 table_add(result, reorderCodeRes, line, status);
947 }
b75a7d8f
A
948 }
949 else
950 {
951 warning(line, "could not obtain rules from collator");
952 if(isStrict()){
953 *status = U_INVALID_FORMAT_ERROR;
954 return NULL;
955 }
956 }
957
958 ucol_close(coll);
959 }
960 else
961 {
729e4ab9
A
962 if(intStatus == U_FILE_ACCESS_ERROR) {
963 error(startline, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly.");
964 *status = intStatus;
965 return NULL;
966 }
b75a7d8f
A
967 warning(line, "%%Collation could not be constructed from CollationElements - check context!");
968 if(isStrict()){
374ca955 969 *status = intStatus;
b75a7d8f
A
970 return NULL;
971 }
972 }
973 } else {
974 if(isVerbose()) {
975 printf("Not building Collation binary\n");
976 }
977 }
978#endif
729e4ab9
A
979 /* in order to achieve smaller data files, we can direct genrb */
980 /* to omit collation rules */
981 if(gOmitCollationRules) {
982 bundle_closeString(state->bundle, member);
983 } else {
984 table_add(result, member, line, status);
985 }
b75a7d8f
A
986 }
987
988 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
989
990 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
991
992 if (U_FAILURE(*status))
993 {
46f4442e 994 res_close(result);
b75a7d8f
A
995 return NULL;
996 }
997 }
998
999 /* not reached */
1000 /* A compiler warning will appear if all paths don't contain a return statement. */
1001/* *status = U_INTERNAL_PROGRAM_ERROR;
1002 return NULL;*/
1003}
1004
374ca955 1005static struct SResource *
729e4ab9 1006parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
374ca955
A
1007{
1008 struct SResource *result = NULL;
1009 struct SResource *member = NULL;
1010 struct SResource *collationRes = NULL;
1011 struct UString *tokenValue;
1012 struct UString comment;
1013 enum ETokenType token;
1014 char subtag[1024], typeKeyword[1024];
1015 uint32_t line;
1016
729e4ab9 1017 result = table_open(state->bundle, tag, NULL, status);
374ca955
A
1018
1019 if (result == NULL || U_FAILURE(*status))
1020 {
1021 return NULL;
1022 }
1023 if(isVerbose()){
1024 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1025 }
1026 if(!newCollation) {
729e4ab9 1027 return addCollation(state, result, startline, status);
73c04bcf
A
1028 }
1029 else {
1030 for(;;) {
1031 ustr_init(&comment);
729e4ab9 1032 token = getToken(state, &tokenValue, &comment, &line, status);
374ca955 1033
73c04bcf 1034 if (token == TOK_CLOSE_BRACE)
374ca955 1035 {
73c04bcf 1036 return result;
374ca955 1037 }
73c04bcf
A
1038
1039 if (token != TOK_STRING)
374ca955 1040 {
46f4442e 1041 res_close(result);
73c04bcf
A
1042 *status = U_INVALID_FORMAT_ERROR;
1043
1044 if (token == TOK_EOF)
1045 {
1046 error(startline, "unterminated table");
1047 }
1048 else
1049 {
1050 error(line, "Unexpected token %s", tokenNames[token]);
1051 }
1052
1053 return NULL;
374ca955
A
1054 }
1055
73c04bcf 1056 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
374ca955 1057
73c04bcf
A
1058 if (U_FAILURE(*status))
1059 {
46f4442e 1060 res_close(result);
73c04bcf
A
1061 return NULL;
1062 }
374ca955 1063
73c04bcf
A
1064 if (uprv_strcmp(subtag, "default") == 0)
1065 {
729e4ab9 1066 member = parseResource(state, subtag, NULL, status);
374ca955 1067
73c04bcf
A
1068 if (U_FAILURE(*status))
1069 {
46f4442e 1070 res_close(result);
73c04bcf
A
1071 return NULL;
1072 }
374ca955 1073
73c04bcf
A
1074 table_add(result, member, line, status);
1075 }
1076 else
1077 {
729e4ab9 1078 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
73c04bcf
A
1079 /* this probably needs to be refactored or recursively use the parser */
1080 /* first we assume that our collation table won't have the explicit type */
1081 /* then, we cannot handle aliases */
1082 if(token == TOK_OPEN_BRACE) {
729e4ab9
A
1083 token = getToken(state, &tokenValue, &comment, &line, status);
1084 collationRes = table_open(state->bundle, subtag, NULL, status);
1085 collationRes = addCollation(state, collationRes, startline, status); /* need to parse the collation data regardless */
1086 if (gIncludeUnihanColl || uprv_strcmp(subtag, "unihan") != 0) {
1087 table_add(result, collationRes, startline, status);
1088 }
73c04bcf
A
1089 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
1090 /* we could have a table too */
729e4ab9 1091 token = peekToken(state, 1, &tokenValue, &line, &comment, status);
73c04bcf
A
1092 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
1093 if(uprv_strcmp(typeKeyword, "alias") == 0) {
729e4ab9 1094 member = parseResource(state, subtag, NULL, status);
73c04bcf
A
1095
1096 if (U_FAILURE(*status))
1097 {
46f4442e 1098 res_close(result);
73c04bcf
A
1099 return NULL;
1100 }
374ca955 1101
73c04bcf
A
1102 table_add(result, member, line, status);
1103 } else {
46f4442e 1104 res_close(result);
73c04bcf
A
1105 *status = U_INVALID_FORMAT_ERROR;
1106 return NULL;
1107 }
1108 } else {
46f4442e 1109 res_close(result);
73c04bcf
A
1110 *status = U_INVALID_FORMAT_ERROR;
1111 return NULL;
1112 }
374ca955 1113 }
374ca955 1114
73c04bcf 1115 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
374ca955 1116
73c04bcf 1117 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
374ca955 1118
73c04bcf
A
1119 if (U_FAILURE(*status))
1120 {
46f4442e 1121 res_close(result);
73c04bcf
A
1122 return NULL;
1123 }
374ca955 1124 }
374ca955
A
1125 }
1126}
1127
b75a7d8f
A
1128/* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1129 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1130static struct SResource *
729e4ab9 1131realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
b75a7d8f
A
1132{
1133 struct SResource *member = NULL;
1134 struct UString *tokenValue=NULL;
374ca955 1135 struct UString comment;
b75a7d8f
A
1136 enum ETokenType token;
1137 char subtag[1024];
1138 uint32_t line;
1139 UBool readToken = FALSE;
1140
1141 /* '{' . (name resource)* '}' */
1142 if(isVerbose()){
374ca955 1143 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1144 }
1145 for (;;)
1146 {
374ca955 1147 ustr_init(&comment);
729e4ab9 1148 token = getToken(state, &tokenValue, &comment, &line, status);
b75a7d8f
A
1149
1150 if (token == TOK_CLOSE_BRACE)
1151 {
1152 if (!readToken) {
1153 warning(startline, "Encountered empty table");
1154 }
1155 return table;
1156 }
1157
1158 if (token != TOK_STRING)
1159 {
b75a7d8f
A
1160 *status = U_INVALID_FORMAT_ERROR;
1161
1162 if (token == TOK_EOF)
1163 {
1164 error(startline, "unterminated table");
1165 }
1166 else
1167 {
374ca955 1168 error(line, "unexpected token %s", tokenNames[token]);
b75a7d8f
A
1169 }
1170
1171 return NULL;
1172 }
1173
374ca955
A
1174 if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
1175 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1176 } else {
1177 *status = U_INVALID_FORMAT_ERROR;
1178 error(line, "invariant characters required for table keys");
374ca955
A
1179 return NULL;
1180 }
b75a7d8f
A
1181
1182 if (U_FAILURE(*status))
1183 {
729e4ab9 1184 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
b75a7d8f
A
1185 return NULL;
1186 }
1187
729e4ab9 1188 member = parseResource(state, subtag, &comment, status);
b75a7d8f
A
1189
1190 if (member == NULL || U_FAILURE(*status))
1191 {
729e4ab9 1192 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
b75a7d8f
A
1193 return NULL;
1194 }
1195
1196 table_add(table, member, line, status);
1197
1198 if (U_FAILURE(*status))
1199 {
729e4ab9 1200 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
b75a7d8f
A
1201 return NULL;
1202 }
1203 readToken = TRUE;
46f4442e 1204 ustr_deinit(&comment);
b75a7d8f
A
1205 }
1206
1207 /* not reached */
1208 /* A compiler warning will appear if all paths don't contain a return statement. */
1209/* *status = U_INTERNAL_PROGRAM_ERROR;
1210 return NULL;*/
1211}
1212
1213static struct SResource *
729e4ab9 1214parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
1215{
1216 struct SResource *result;
1217
1218 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
1219 {
729e4ab9 1220 return parseCollationElements(state, tag, startline, FALSE, status);
374ca955
A
1221 }
1222 if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1223 {
729e4ab9 1224 return parseCollationElements(state, tag, startline, TRUE, status);
b75a7d8f
A
1225 }
1226 if(isVerbose()){
374ca955 1227 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f 1228 }
374ca955 1229
729e4ab9 1230 result = table_open(state->bundle, tag, comment, status);
b75a7d8f
A
1231
1232 if (result == NULL || U_FAILURE(*status))
1233 {
1234 return NULL;
1235 }
1236
729e4ab9 1237 return realParseTable(state, result, tag, startline, status);
b75a7d8f
A
1238}
1239
1240static struct SResource *
729e4ab9 1241parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
1242{
1243 struct SResource *result = NULL;
1244 struct SResource *member = NULL;
1245 struct UString *tokenValue;
374ca955 1246 struct UString memberComments;
b75a7d8f
A
1247 enum ETokenType token;
1248 UBool readToken = FALSE;
1249
729e4ab9 1250 result = array_open(state->bundle, tag, comment, status);
b75a7d8f
A
1251
1252 if (result == NULL || U_FAILURE(*status))
1253 {
1254 return NULL;
1255 }
1256 if(isVerbose()){
374ca955 1257 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f 1258 }
374ca955
A
1259
1260 ustr_init(&memberComments);
1261
b75a7d8f
A
1262 /* '{' . resource [','] '}' */
1263 for (;;)
1264 {
374ca955
A
1265 /* reset length */
1266 ustr_setlen(&memberComments, 0, status);
1267
b75a7d8f 1268 /* check for end of array, but don't consume next token unless it really is the end */
729e4ab9 1269 token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
374ca955 1270
b75a7d8f
A
1271
1272 if (token == TOK_CLOSE_BRACE)
1273 {
729e4ab9 1274 getToken(state, NULL, NULL, NULL, status);
b75a7d8f
A
1275 if (!readToken) {
1276 warning(startline, "Encountered empty array");
1277 }
1278 break;
1279 }
1280
1281 if (token == TOK_EOF)
1282 {
46f4442e 1283 res_close(result);
b75a7d8f
A
1284 *status = U_INVALID_FORMAT_ERROR;
1285 error(startline, "unterminated array");
1286 return NULL;
1287 }
1288
1289 /* string arrays are a special case */
1290 if (token == TOK_STRING)
1291 {
729e4ab9
A
1292 getToken(state, &tokenValue, &memberComments, NULL, status);
1293 member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
b75a7d8f
A
1294 }
1295 else
1296 {
729e4ab9 1297 member = parseResource(state, NULL, &memberComments, status);
b75a7d8f
A
1298 }
1299
1300 if (member == NULL || U_FAILURE(*status))
1301 {
46f4442e 1302 res_close(result);
b75a7d8f
A
1303 return NULL;
1304 }
1305
1306 array_add(result, member, status);
1307
1308 if (U_FAILURE(*status))
1309 {
46f4442e 1310 res_close(result);
b75a7d8f
A
1311 return NULL;
1312 }
1313
1314 /* eat optional comma if present */
729e4ab9 1315 token = peekToken(state, 0, NULL, NULL, NULL, status);
b75a7d8f
A
1316
1317 if (token == TOK_COMMA)
1318 {
729e4ab9 1319 getToken(state, NULL, NULL, NULL, status);
b75a7d8f
A
1320 }
1321
1322 if (U_FAILURE(*status))
1323 {
46f4442e 1324 res_close(result);
b75a7d8f
A
1325 return NULL;
1326 }
1327 readToken = TRUE;
1328 }
1329
46f4442e 1330 ustr_deinit(&memberComments);
b75a7d8f
A
1331 return result;
1332}
1333
1334static struct SResource *
729e4ab9 1335parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
1336{
1337 struct SResource *result = NULL;
1338 enum ETokenType token;
1339 char *string;
1340 int32_t value;
1341 UBool readToken = FALSE;
b75a7d8f
A
1342 char *stopstring;
1343 uint32_t len;
374ca955 1344 struct UString memberComments;
b75a7d8f 1345
729e4ab9 1346 result = intvector_open(state->bundle, tag, comment, status);
b75a7d8f
A
1347
1348 if (result == NULL || U_FAILURE(*status))
1349 {
1350 return NULL;
1351 }
1352
1353 if(isVerbose()){
374ca955 1354 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f 1355 }
374ca955 1356 ustr_init(&memberComments);
b75a7d8f
A
1357 /* '{' . string [','] '}' */
1358 for (;;)
1359 {
374ca955
A
1360 ustr_setlen(&memberComments, 0, status);
1361
b75a7d8f 1362 /* check for end of array, but don't consume next token unless it really is the end */
729e4ab9 1363 token = peekToken(state, 0, NULL, NULL,&memberComments, status);
b75a7d8f
A
1364
1365 if (token == TOK_CLOSE_BRACE)
1366 {
1367 /* it's the end, consume the close brace */
729e4ab9 1368 getToken(state, NULL, NULL, NULL, status);
b75a7d8f
A
1369 if (!readToken) {
1370 warning(startline, "Encountered empty int vector");
1371 }
46f4442e 1372 ustr_deinit(&memberComments);
b75a7d8f
A
1373 return result;
1374 }
1375
729e4ab9 1376 string = getInvariantString(state, NULL, NULL, status);
b75a7d8f
A
1377
1378 if (U_FAILURE(*status))
1379 {
46f4442e 1380 res_close(result);
b75a7d8f
A
1381 return NULL;
1382 }
b75a7d8f 1383
46f4442e 1384 /* For handling illegal char in the Intvector */
b75a7d8f 1385 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
374ca955 1386 len=(uint32_t)(stopstring-string);
b75a7d8f
A
1387
1388 if(len==uprv_strlen(string))
1389 {
1390 intvector_add(result, value, status);
1391 uprv_free(string);
729e4ab9 1392 token = peekToken(state, 0, NULL, NULL, NULL, status);
b75a7d8f
A
1393 }
1394 else
1395 {
1396 uprv_free(string);
1397 *status=U_INVALID_CHAR_FOUND;
1398 }
b75a7d8f
A
1399
1400 if (U_FAILURE(*status))
1401 {
46f4442e 1402 res_close(result);
b75a7d8f
A
1403 return NULL;
1404 }
1405
1406 /* the comma is optional (even though it is required to prevent the reader from concatenating
1407 consecutive entries) so that a missing comma on the last entry isn't an error */
1408 if (token == TOK_COMMA)
1409 {
729e4ab9 1410 getToken(state, NULL, NULL, NULL, status);
b75a7d8f
A
1411 }
1412 readToken = TRUE;
1413 }
1414
1415 /* not reached */
1416 /* A compiler warning will appear if all paths don't contain a return statement. */
1417/* intvector_close(result, status);
1418 *status = U_INTERNAL_PROGRAM_ERROR;
1419 return NULL;*/
1420}
1421
1422static struct SResource *
729e4ab9 1423parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
1424{
1425 struct SResource *result = NULL;
1426 uint8_t *value;
1427 char *string;
1428 char toConv[3] = {'\0', '\0', '\0'};
1429 uint32_t count;
1430 uint32_t i;
1431 uint32_t line;
b75a7d8f
A
1432 char *stopstring;
1433 uint32_t len;
1434
729e4ab9 1435 string = getInvariantString(state, &line, NULL, status);
b75a7d8f
A
1436
1437 if (string == NULL || U_FAILURE(*status))
1438 {
1439 return NULL;
1440 }
1441
729e4ab9 1442 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
1443
1444 if (U_FAILURE(*status))
1445 {
1446 uprv_free(string);
1447 return NULL;
1448 }
1449
1450 if(isVerbose()){
374ca955 1451 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1452 }
1453
374ca955 1454 count = (uint32_t)uprv_strlen(string);
b75a7d8f
A
1455 if (count > 0){
1456 if((count % 2)==0){
1457 value = uprv_malloc(sizeof(uint8_t) * count);
1458
1459 if (value == NULL)
1460 {
1461 uprv_free(string);
1462 *status = U_MEMORY_ALLOCATION_ERROR;
1463 return NULL;
1464 }
1465
1466 for (i = 0; i < count; i += 2)
1467 {
1468 toConv[0] = string[i];
1469 toConv[1] = string[i + 1];
1470
1471 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
374ca955 1472 len=(uint32_t)(stopstring-toConv);
b75a7d8f
A
1473
1474 if(len!=uprv_strlen(toConv))
1475 {
1476 uprv_free(string);
1477 *status=U_INVALID_CHAR_FOUND;
1478 return NULL;
1479 }
1480 }
1481
729e4ab9 1482 result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status);
b75a7d8f
A
1483
1484 uprv_free(value);
1485 }
1486 else
1487 {
1488 *status = U_INVALID_CHAR_FOUND;
1489 uprv_free(string);
1490 error(line, "Encountered invalid binary string");
1491 return NULL;
1492 }
1493 }
1494 else
1495 {
729e4ab9 1496 result = bin_open(state->bundle, tag, 0, NULL, "",comment,status);
b75a7d8f
A
1497 warning(startline, "Encountered empty binary tag");
1498 }
1499 uprv_free(string);
1500
1501 return result;
1502}
1503
1504static struct SResource *
729e4ab9 1505parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
1506{
1507 struct SResource *result = NULL;
1508 int32_t value;
1509 char *string;
b75a7d8f
A
1510 char *stopstring;
1511 uint32_t len;
1512
729e4ab9 1513 string = getInvariantString(state, NULL, NULL, status);
b75a7d8f
A
1514
1515 if (string == NULL || U_FAILURE(*status))
1516 {
1517 return NULL;
1518 }
1519
729e4ab9 1520 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
1521
1522 if (U_FAILURE(*status))
1523 {
1524 uprv_free(string);
1525 return NULL;
1526 }
1527
1528 if(isVerbose()){
374ca955 1529 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1530 }
1531
1532 if (uprv_strlen(string) <= 0)
1533 {
1534 warning(startline, "Encountered empty integer. Default value is 0.");
1535 }
1536
46f4442e
A
1537 /* Allow integer support for hexdecimal, octal digit and decimal*/
1538 /* and handle illegal char in the integer*/
b75a7d8f 1539 value = uprv_strtoul(string, &stopstring, 0);
374ca955 1540 len=(uint32_t)(stopstring-string);
b75a7d8f
A
1541 if(len==uprv_strlen(string))
1542 {
729e4ab9 1543 result = int_open(state->bundle, tag, value, comment, status);
b75a7d8f
A
1544 }
1545 else
1546 {
1547 *status=U_INVALID_CHAR_FOUND;
1548 }
1549 uprv_free(string);
1550
1551 return result;
1552}
1553
1554static struct SResource *
729e4ab9 1555parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
b75a7d8f
A
1556{
1557 struct SResource *result;
1558 FileStream *file;
1559 int32_t len;
1560 uint8_t *data;
1561 char *filename;
1562 uint32_t line;
1563 char *fullname = NULL;
1564 int32_t numRead = 0;
729e4ab9 1565 filename = getInvariantString(state, &line, NULL, status);
b75a7d8f
A
1566
1567 if (U_FAILURE(*status))
1568 {
1569 return NULL;
1570 }
1571
729e4ab9 1572 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
1573
1574 if (U_FAILURE(*status))
1575 {
1576 uprv_free(filename);
1577 return NULL;
1578 }
1579
1580 if(isVerbose()){
374ca955 1581 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1582 }
1583
1584 /* Open the input file for reading */
729e4ab9 1585 if (state->inputdir == NULL)
b75a7d8f 1586 {
46f4442e
A
1587#if 1
1588 /*
1589 * Always save file file name, even if there's
1590 * no input directory specified. MIGHT BREAK SOMETHING
1591 */
1592 int32_t filenameLength = uprv_strlen(filename);
1593
1594 fullname = (char *) uprv_malloc(filenameLength + 1);
1595 uprv_strcpy(fullname, filename);
1596#endif
1597
b75a7d8f
A
1598 file = T_FileStream_open(filename, "rb");
1599 }
1600 else
1601 {
1602
374ca955 1603 int32_t count = (int32_t)uprv_strlen(filename);
b75a7d8f 1604
729e4ab9 1605 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
b75a7d8f 1606 {
729e4ab9 1607 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
b75a7d8f
A
1608
1609 /* test for NULL */
1610 if(fullname == NULL)
1611 {
1612 *status = U_MEMORY_ALLOCATION_ERROR;
1613 return NULL;
1614 }
1615
729e4ab9 1616 uprv_strcpy(fullname, state->inputdir);
b75a7d8f 1617
729e4ab9
A
1618 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1619 fullname[state->inputdirLength + 1] = '\0';
b75a7d8f
A
1620
1621 uprv_strcat(fullname, filename);
1622 }
1623 else
1624 {
729e4ab9 1625 fullname = (char *) uprv_malloc(state->inputdirLength + count + 1);
b75a7d8f
A
1626
1627 /* test for NULL */
1628 if(fullname == NULL)
1629 {
1630 *status = U_MEMORY_ALLOCATION_ERROR;
1631 return NULL;
1632 }
1633
729e4ab9 1634 uprv_strcpy(fullname, state->inputdir);
b75a7d8f
A
1635 uprv_strcat(fullname, filename);
1636 }
1637
1638 file = T_FileStream_open(fullname, "rb");
1639
1640 }
1641
1642 if (file == NULL)
1643 {
1644 error(line, "couldn't open input file %s", filename);
1645 *status = U_FILE_ACCESS_ERROR;
1646 return NULL;
1647 }
1648
1649 len = T_FileStream_size(file);
1650 data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
1651 /* test for NULL */
1652 if(data == NULL)
1653 {
1654 *status = U_MEMORY_ALLOCATION_ERROR;
1655 T_FileStream_close (file);
1656 return NULL;
1657 }
1658
1659 numRead = T_FileStream_read (file, data, len);
1660 T_FileStream_close (file);
1661
729e4ab9 1662 result = bin_open(state->bundle, tag, len, data, fullname, comment, status);
b75a7d8f
A
1663
1664 uprv_free(data);
1665 uprv_free(filename);
1666 uprv_free(fullname);
1667
1668 return result;
1669}
1670
1671static struct SResource *
729e4ab9 1672parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
b75a7d8f
A
1673{
1674 struct SResource *result;
1675 int32_t len=0;
1676 char *filename;
1677 uint32_t line;
1678 UChar *pTarget = NULL;
1679
1680 UCHARBUF *ucbuf;
1681 char *fullname = NULL;
1682 int32_t count = 0;
1683 const char* cp = NULL;
1684 const UChar* uBuffer = NULL;
1685
729e4ab9 1686 filename = getInvariantString(state, &line, NULL, status);
374ca955 1687 count = (int32_t)uprv_strlen(filename);
b75a7d8f
A
1688
1689 if (U_FAILURE(*status))
1690 {
1691 return NULL;
1692 }
1693
729e4ab9 1694 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
1695
1696 if (U_FAILURE(*status))
1697 {
1698 uprv_free(filename);
1699 return NULL;
1700 }
1701
1702 if(isVerbose()){
374ca955 1703 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1704 }
1705
729e4ab9 1706 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
b75a7d8f
A
1707 /* test for NULL */
1708 if(fullname == NULL)
1709 {
1710 *status = U_MEMORY_ALLOCATION_ERROR;
1711 uprv_free(filename);
1712 return NULL;
374ca955 1713 }
b75a7d8f 1714
729e4ab9
A
1715 if(state->inputdir!=NULL){
1716 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
b75a7d8f
A
1717 {
1718
729e4ab9 1719 uprv_strcpy(fullname, state->inputdir);
b75a7d8f 1720
729e4ab9
A
1721 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1722 fullname[state->inputdirLength + 1] = '\0';
b75a7d8f
A
1723
1724 uprv_strcat(fullname, filename);
1725 }
1726 else
1727 {
729e4ab9 1728 uprv_strcpy(fullname, state->inputdir);
b75a7d8f
A
1729 uprv_strcat(fullname, filename);
1730 }
1731 }else{
1732 uprv_strcpy(fullname,filename);
1733 }
1734
1735 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1736
1737 if (U_FAILURE(*status)) {
1738 error(line, "couldn't open input file %s\n", filename);
1739 return NULL;
1740 }
1741
1742 uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
729e4ab9 1743 result = string_open(state->bundle, tag, uBuffer, len, comment, status);
b75a7d8f
A
1744
1745 uprv_free(pTarget);
1746
1747 uprv_free(filename);
1748 uprv_free(fullname);
1749
1750 return result;
1751}
1752
73c04bcf
A
1753
1754
1755
1756
1757U_STRING_DECL(k_type_string, "string", 6);
1758U_STRING_DECL(k_type_binary, "binary", 6);
1759U_STRING_DECL(k_type_bin, "bin", 3);
1760U_STRING_DECL(k_type_table, "table", 5);
1761U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17);
1762U_STRING_DECL(k_type_int, "int", 3);
1763U_STRING_DECL(k_type_integer, "integer", 7);
1764U_STRING_DECL(k_type_array, "array", 5);
1765U_STRING_DECL(k_type_alias, "alias", 5);
1766U_STRING_DECL(k_type_intvector, "intvector", 9);
1767U_STRING_DECL(k_type_import, "import", 6);
1768U_STRING_DECL(k_type_include, "include", 7);
1769U_STRING_DECL(k_type_reserved, "reserved", 8);
1770
1771/* Various non-standard processing plugins that create one or more special resources. */
1772U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1773U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18);
1774U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23);
1775U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19);
1776
1777typedef enum EResourceType
1778{
1779 RT_UNKNOWN,
1780 RT_STRING,
1781 RT_BINARY,
1782 RT_TABLE,
1783 RT_TABLE_NO_FALLBACK,
1784 RT_INTEGER,
1785 RT_ARRAY,
1786 RT_ALIAS,
1787 RT_INTVECTOR,
1788 RT_IMPORT,
1789 RT_INCLUDE,
1790 RT_PROCESS_UCA_RULES,
1791 RT_PROCESS_COLLATION,
1792 RT_PROCESS_TRANSLITERATOR,
1793 RT_PROCESS_DEPENDENCY,
1794 RT_RESERVED
1795} EResourceType;
1796
1797static struct {
1798 const char *nameChars; /* only used for debugging */
1799 const UChar *nameUChars;
1800 ParseResourceFunction *parseFunction;
1801} gResourceTypes[] = {
1802 {"Unknown", NULL, NULL},
1803 {"string", k_type_string, parseString},
1804 {"binary", k_type_binary, parseBinary},
1805 {"table", k_type_table, parseTable},
1806 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1807 {"integer", k_type_integer, parseInteger},
1808 {"array", k_type_array, parseArray},
1809 {"alias", k_type_alias, parseAlias},
1810 {"intvector", k_type_intvector, parseIntVector},
1811 {"import", k_type_import, parseImport},
1812 {"include", k_type_include, parseInclude},
1813 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1814 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1815 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1816 {"process(dependency)", k_type_plugin_dependency, parseDependency},
1817 {"reserved", NULL, NULL}
1818};
1819
46f4442e 1820void initParser(UBool omitBinaryCollation, UBool omitCollationRules)
73c04bcf 1821{
73c04bcf
A
1822 U_STRING_INIT(k_type_string, "string", 6);
1823 U_STRING_INIT(k_type_binary, "binary", 6);
1824 U_STRING_INIT(k_type_bin, "bin", 3);
1825 U_STRING_INIT(k_type_table, "table", 5);
1826 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17);
1827 U_STRING_INIT(k_type_int, "int", 3);
1828 U_STRING_INIT(k_type_integer, "integer", 7);
1829 U_STRING_INIT(k_type_array, "array", 5);
1830 U_STRING_INIT(k_type_alias, "alias", 5);
1831 U_STRING_INIT(k_type_intvector, "intvector", 9);
1832 U_STRING_INIT(k_type_import, "import", 6);
1833 U_STRING_INIT(k_type_reserved, "reserved", 8);
1834 U_STRING_INIT(k_type_include, "include", 7);
1835
1836 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1837 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18);
1838 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23);
1839 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19);
729e4ab9 1840
46f4442e
A
1841 gMakeBinaryCollation = !omitBinaryCollation;
1842 gOmitCollationRules = omitCollationRules;
73c04bcf
A
1843}
1844
1845static U_INLINE UBool isTable(enum EResourceType type) {
1846 return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK);
1847}
1848
1849static enum EResourceType
729e4ab9 1850parseResourceType(ParseState* state, UErrorCode *status)
73c04bcf
A
1851{
1852 struct UString *tokenValue;
1853 struct UString comment;
1854 enum EResourceType result = RT_UNKNOWN;
1855 uint32_t line=0;
1856 ustr_init(&comment);
729e4ab9 1857 expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
73c04bcf
A
1858
1859 if (U_FAILURE(*status))
1860 {
1861 return RT_UNKNOWN;
1862 }
1863
1864 *status = U_ZERO_ERROR;
1865
1866 /* Search for normal types */
1867 result=RT_UNKNOWN;
1868 while (++result < RT_RESERVED) {
1869 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1870 break;
1871 }
1872 }
1873 /* Now search for the aliases */
1874 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
1875 result = RT_INTEGER;
1876 }
1877 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
1878 result = RT_BINARY;
1879 }
1880 else if (result == RT_RESERVED) {
1881 char tokenBuffer[1024];
1882 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1883 tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1884 *status = U_INVALID_FORMAT_ERROR;
1885 error(line, "unknown resource type '%s'", tokenBuffer);
1886 }
1887
1888 return result;
1889}
1890
1891/* parse a non-top-level resource */
b75a7d8f 1892static struct SResource *
729e4ab9 1893parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
1894{
1895 enum ETokenType token;
1896 enum EResourceType resType = RT_UNKNOWN;
73c04bcf 1897 ParseResourceFunction *parseFunction = NULL;
b75a7d8f
A
1898 struct UString *tokenValue;
1899 uint32_t startline;
1900 uint32_t line;
1901
729e4ab9 1902 token = getToken(state, &tokenValue, NULL, &startline, status);
b75a7d8f
A
1903
1904 if(isVerbose()){
374ca955 1905 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1906 }
1907
1908 /* name . [ ':' type ] '{' resource '}' */
1909 /* This function parses from the colon onwards. If the colon is present, parse the
1910 type then try to parse a resource of that type. If there is no explicit type,
1911 work it out using the lookahead tokens. */
1912 switch (token)
1913 {
1914 case TOK_EOF:
1915 *status = U_INVALID_FORMAT_ERROR;
1916 error(startline, "Unexpected EOF encountered");
1917 return NULL;
1918
1919 case TOK_ERROR:
1920 *status = U_INVALID_FORMAT_ERROR;
1921 return NULL;
1922
1923 case TOK_COLON:
729e4ab9
A
1924 resType = parseResourceType(state, status);
1925 expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
b75a7d8f
A
1926
1927 if (U_FAILURE(*status))
1928 {
1929 return NULL;
1930 }
1931
1932 break;
1933
1934 case TOK_OPEN_BRACE:
1935 break;
1936
1937 default:
1938 *status = U_INVALID_FORMAT_ERROR;
1939 error(startline, "syntax error while reading a resource, expected '{' or ':'");
1940 return NULL;
1941 }
1942
1943 if (resType == RT_UNKNOWN)
1944 {
1945 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
1946 We could have any of the following:
1947 { { => array (nested)
1948 { :/} => array
1949 { string , => string array
1950
b75a7d8f
A
1951 { string { => table
1952
b75a7d8f
A
1953 { string :/{ => table
1954 { string } => string
1955 */
1956
729e4ab9 1957 token = peekToken(state, 0, NULL, &line, NULL,status);
b75a7d8f
A
1958
1959 if (U_FAILURE(*status))
1960 {
1961 return NULL;
1962 }
1963
b75a7d8f
A
1964 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
1965 {
1966 resType = RT_ARRAY;
1967 }
1968 else if (token == TOK_STRING)
1969 {
729e4ab9 1970 token = peekToken(state, 1, NULL, &line, NULL, status);
b75a7d8f
A
1971
1972 if (U_FAILURE(*status))
1973 {
1974 return NULL;
1975 }
1976
1977 switch (token)
1978 {
1979 case TOK_COMMA: resType = RT_ARRAY; break;
1980 case TOK_OPEN_BRACE: resType = RT_TABLE; break;
1981 case TOK_CLOSE_BRACE: resType = RT_STRING; break;
b75a7d8f
A
1982 case TOK_COLON: resType = RT_TABLE; break;
1983 default:
1984 *status = U_INVALID_FORMAT_ERROR;
1985 error(line, "Unexpected token after string, expected ',', '{' or '}'");
1986 return NULL;
1987 }
1988 }
1989 else
1990 {
1991 *status = U_INVALID_FORMAT_ERROR;
1992 error(line, "Unexpected token after '{'");
1993 return NULL;
1994 }
1995
1996 /* printf("Type guessed as %s\n", resourceNames[resType]); */
73c04bcf
A
1997 } else if(resType == RT_TABLE_NO_FALLBACK) {
1998 *status = U_INVALID_FORMAT_ERROR;
1999 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
2000 return NULL;
b75a7d8f
A
2001 }
2002
2003 /* We should now know what we need to parse next, so call the appropriate parser
2004 function and return. */
73c04bcf
A
2005 parseFunction = gResourceTypes[resType].parseFunction;
2006 if (parseFunction != NULL) {
729e4ab9 2007 return parseFunction(state, tag, startline, comment, status);
73c04bcf
A
2008 }
2009 else {
b75a7d8f 2010 *status = U_INTERNAL_PROGRAM_ERROR;
73c04bcf 2011 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
b75a7d8f
A
2012 }
2013
2014 return NULL;
2015}
2016
73c04bcf 2017/* parse the top-level resource */
b75a7d8f 2018struct SRBRoot *
73c04bcf 2019parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UErrorCode *status)
b75a7d8f
A
2020{
2021 struct UString *tokenValue;
374ca955 2022 struct UString comment;
b75a7d8f 2023 uint32_t line;
b75a7d8f
A
2024 enum EResourceType bundleType;
2025 enum ETokenType token;
729e4ab9
A
2026 ParseState state;
2027 uint32_t i;
2028 int encLength;
2029 char* enc;
2030 for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
2031 {
2032 ustr_init(&state.lookahead[i].value);
2033 ustr_init(&state.lookahead[i].comment);
2034 }
b75a7d8f 2035
729e4ab9 2036 initLookahead(&state, buf, status);
b75a7d8f 2037
729e4ab9
A
2038 state.inputdir = inputDir;
2039 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
2040 state.outputdir = outputDir;
2041 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
374ca955
A
2042
2043 ustr_init(&comment);
729e4ab9 2044 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
b75a7d8f 2045
729e4ab9 2046 state.bundle = bundle_open(&comment, FALSE, status);
b75a7d8f 2047
729e4ab9 2048 if (state.bundle == NULL || U_FAILURE(*status))
b75a7d8f
A
2049 {
2050 return NULL;
2051 }
2052
374ca955 2053
729e4ab9
A
2054 bundle_setlocale(state.bundle, tokenValue->fChars, status);
2055
b75a7d8f 2056 /* The following code is to make Empty bundle work no matter with :table specifer or not */
729e4ab9 2057 token = getToken(&state, NULL, NULL, &line, status);
73c04bcf 2058 if(token==TOK_COLON) {
b75a7d8f 2059 *status=U_ZERO_ERROR;
729e4ab9 2060 bundleType=parseResourceType(&state, status);
b75a7d8f 2061
73c04bcf 2062 if(isTable(bundleType))
b75a7d8f 2063 {
729e4ab9 2064 expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
b75a7d8f
A
2065 }
2066 else
2067 {
2068 *status=U_PARSE_ERROR;
729e4ab9
A
2069 /* printf("asdsdweqdasdad\n"); */
2070
b75a7d8f
A
2071 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
2072 }
2073 }
2074 else
2075 {
73c04bcf 2076 /* not a colon */
b75a7d8f
A
2077 if(token==TOK_OPEN_BRACE)
2078 {
2079 *status=U_ZERO_ERROR;
73c04bcf 2080 bundleType=RT_TABLE;
b75a7d8f
A
2081 }
2082 else
2083 {
73c04bcf
A
2084 /* neither colon nor open brace */
2085 *status=U_PARSE_ERROR;
2086 bundleType=RT_UNKNOWN;
b75a7d8f
A
2087 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
2088 }
2089 }
b75a7d8f
A
2090
2091 if (U_FAILURE(*status))
2092 {
729e4ab9 2093 bundle_close(state.bundle, status);
b75a7d8f
A
2094 return NULL;
2095 }
2096
73c04bcf
A
2097 if(bundleType==RT_TABLE_NO_FALLBACK) {
2098 /*
2099 * Parse a top-level table with the table(nofallback) declaration.
2100 * This is the same as a regular table, but also sets the
2101 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2102 */
729e4ab9 2103 state.bundle->noFallback=TRUE;
73c04bcf
A
2104 }
2105 /* top-level tables need not handle special table names like "collations" */
729e4ab9
A
2106 realParseTable(&state, state.bundle->fRoot, NULL, line, status);
2107
73c04bcf 2108 if(dependencyArray!=NULL){
729e4ab9 2109 table_add(state.bundle->fRoot, dependencyArray, 0, status);
73c04bcf
A
2110 dependencyArray = NULL;
2111 }
b75a7d8f
A
2112 if (U_FAILURE(*status))
2113 {
729e4ab9 2114 bundle_close(state.bundle, status);
46f4442e 2115 res_close(dependencyArray);
b75a7d8f
A
2116 return NULL;
2117 }
2118
729e4ab9 2119 if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
b75a7d8f
A
2120 {
2121 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
2122 if(isStrict()){
2123 *status = U_INVALID_FORMAT_ERROR;
2124 return NULL;
2125 }
2126 }
2127
729e4ab9 2128 cleanupLookahead(&state);
46f4442e 2129 ustr_deinit(&comment);
729e4ab9 2130 return state.bundle;
b75a7d8f 2131}