]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/genrb/parse.cpp
ICU-511.25.tar.gz
[apple/icu.git] / icuSources / tools / genrb / parse.cpp
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
3*
4388f060 4* Copyright (C) 1998-2012, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8*
4388f060 9* File parse.cpp
b75a7d8f
A
10*
11* Modification History:
12*
13* Date Name Description
14* 05/26/99 stephen Creation.
15* 02/25/00 weiv Overhaul to write udata
16* 5/10/01 Ram removed ustdio dependency
17* 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
18*******************************************************************************
19*/
20
21#include "ucol_imp.h"
22#include "parse.h"
23#include "errmsg.h"
24#include "uhash.h"
25#include "cmemory.h"
26#include "cstring.h"
374ca955 27#include "uinvchar.h"
b75a7d8f
A
28#include "read.h"
29#include "ustr.h"
30#include "reslist.h"
73c04bcf 31#include "rbt_pars.h"
729e4ab9 32#include "genrb.h"
b75a7d8f 33#include "unicode/ustring.h"
729e4ab9 34#include "unicode/uscript.h"
b75a7d8f 35#include "unicode/putil.h"
73c04bcf 36#include <stdio.h>
b75a7d8f
A
37
38/* Number of tokens to read ahead of the current stream position */
374ca955 39#define MAX_LOOKAHEAD 3
b75a7d8f 40
b75a7d8f
A
41#define CR 0x000D
42#define LF 0x000A
43#define SPACE 0x0020
374ca955 44#define TAB 0x0009
b75a7d8f
A
45#define ESCAPE 0x005C
46#define HASH 0x0023
47#define QUOTE 0x0027
73c04bcf 48#define ZERO 0x0030
b75a7d8f
A
49#define STARTCOMMAND 0x005B
50#define ENDCOMMAND 0x005D
73c04bcf
A
51#define OPENSQBRACKET 0x005B
52#define CLOSESQBRACKET 0x005D
b75a7d8f 53
b75a7d8f
A
54struct Lookahead
55{
56 enum ETokenType type;
374ca955
A
57 struct UString value;
58 struct UString comment;
59 uint32_t line;
b75a7d8f
A
60};
61
62/* keep in sync with token defines in read.h */
374ca955 63const char *tokenNames[TOK_TOKEN_COUNT] =
b75a7d8f
A
64{
65 "string", /* A string token, such as "MonthNames" */
66 "'{'", /* An opening brace character */
67 "'}'", /* A closing brace character */
68 "','", /* A comma */
69 "':'", /* A colon */
70
71 "<end of file>", /* End of the file has been reached successfully */
374ca955 72 "<end of line>"
b75a7d8f
A
73};
74
75/* Just to store "TRUE" */
51004dcb 76//static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
b75a7d8f 77
729e4ab9
A
78typedef struct {
79 struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
80 uint32_t lookaheadPosition;
81 UCHARBUF *buffer;
82 struct SRBRoot *bundle;
83 const char *inputdir;
84 uint32_t inputdirLength;
85 const char *outputdir;
86 uint32_t outputdirLength;
4388f060 87 UBool makeBinaryCollation;
729e4ab9 88} ParseState;
b75a7d8f 89
46f4442e 90static UBool gOmitCollationRules = FALSE;
b75a7d8f 91
729e4ab9
A
92typedef struct SResource *
93ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
94
95static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
b75a7d8f 96
b75a7d8f
A
97/* The nature of the lookahead buffer:
98 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
99 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
100 When getToken is called, the current pointer is moved to the next slot and the
101 old slot is filled with the next token from the reader by calling getNextToken.
102 The token values are stored in the slot, which means that token values don't
103 survive a call to getToken, ie.
104
105 UString *value;
106
107 getToken(&value, NULL, status);
108 getToken(NULL, NULL, status); bad - value is now a different string
109*/
110static void
729e4ab9 111initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
b75a7d8f
A
112{
113 static uint32_t initTypeStrings = 0;
114 uint32_t i;
115
116 if (!initTypeStrings)
117 {
118 initTypeStrings = 1;
119 }
120
729e4ab9
A
121 state->lookaheadPosition = 0;
122 state->buffer = buf;
b75a7d8f
A
123
124 resetLineNumber();
125
126 for (i = 0; i < MAX_LOOKAHEAD; i++)
127 {
729e4ab9 128 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
b75a7d8f
A
129 if (U_FAILURE(*status))
130 {
131 return;
132 }
133 }
134
135 *status = U_ZERO_ERROR;
136}
137
46f4442e 138static void
729e4ab9 139cleanupLookahead(ParseState* state)
46f4442e
A
140{
141 uint32_t i;
51004dcb 142 for (i = 0; i <= MAX_LOOKAHEAD; i++)
46f4442e 143 {
729e4ab9
A
144 ustr_deinit(&state->lookahead[i].value);
145 ustr_deinit(&state->lookahead[i].comment);
46f4442e
A
146 }
147
148}
149
b75a7d8f 150static enum ETokenType
729e4ab9 151getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
b75a7d8f
A
152{
153 enum ETokenType result;
154 uint32_t i;
155
729e4ab9 156 result = state->lookahead[state->lookaheadPosition].type;
b75a7d8f
A
157
158 if (tokenValue != NULL)
159 {
729e4ab9 160 *tokenValue = &state->lookahead[state->lookaheadPosition].value;
b75a7d8f
A
161 }
162
163 if (linenumber != NULL)
164 {
729e4ab9 165 *linenumber = state->lookahead[state->lookaheadPosition].line;
b75a7d8f
A
166 }
167
374ca955
A
168 if (comment != NULL)
169 {
729e4ab9 170 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
374ca955
A
171 }
172
729e4ab9
A
173 i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
174 state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
175 ustr_setlen(&state->lookahead[i].comment, 0, status);
176 ustr_setlen(&state->lookahead[i].value, 0, status);
177 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
b75a7d8f
A
178
179 /* printf("getToken, returning %s\n", tokenNames[result]); */
180
181 return result;
182}
183
184static enum ETokenType
729e4ab9 185peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
b75a7d8f 186{
729e4ab9 187 uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
b75a7d8f
A
188
189 if (U_FAILURE(*status))
190 {
191 return TOK_ERROR;
192 }
193
194 if (lookaheadCount >= MAX_LOOKAHEAD)
195 {
196 *status = U_INTERNAL_PROGRAM_ERROR;
197 return TOK_ERROR;
198 }
199
200 if (tokenValue != NULL)
201 {
729e4ab9 202 *tokenValue = &state->lookahead[i].value;
b75a7d8f
A
203 }
204
205 if (linenumber != NULL)
206 {
729e4ab9 207 *linenumber = state->lookahead[i].line;
b75a7d8f
A
208 }
209
374ca955 210 if(comment != NULL){
729e4ab9 211 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
374ca955
A
212 }
213
729e4ab9 214 return state->lookahead[i].type;
b75a7d8f
A
215}
216
217static void
729e4ab9 218expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
b75a7d8f
A
219{
220 uint32_t line;
374ca955 221
729e4ab9 222 enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
b75a7d8f 223
73c04bcf 224 if (linenumber != NULL)
b75a7d8f 225 {
73c04bcf 226 *linenumber = line;
b75a7d8f
A
227 }
228
73c04bcf 229 if (U_FAILURE(*status))
b75a7d8f 230 {
73c04bcf 231 return;
b75a7d8f
A
232 }
233
234 if (token != expectedToken)
235 {
236 *status = U_INVALID_FORMAT_ERROR;
237 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
238 }
46f4442e 239 else
b75a7d8f
A
240 {
241 *status = U_ZERO_ERROR;
242 }
243}
244
729e4ab9 245static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
b75a7d8f
A
246{
247 struct UString *tokenValue;
248 char *result;
249 uint32_t count;
250
729e4ab9 251 expect(state, TOK_STRING, &tokenValue, comment, line, status);
b75a7d8f
A
252
253 if (U_FAILURE(*status))
254 {
255 return NULL;
256 }
257
374ca955
A
258 count = u_strlen(tokenValue->fChars);
259 if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
260 *status = U_INVALID_FORMAT_ERROR;
261 error(*line, "invariant characters required for table keys, binary data, etc.");
262 return NULL;
263 }
264
51004dcb 265 result = static_cast<char *>(uprv_malloc(count+1));
b75a7d8f
A
266
267 if (result == NULL)
268 {
269 *status = U_MEMORY_ALLOCATION_ERROR;
270 return NULL;
271 }
272
374ca955 273 u_UCharsToChars(tokenValue->fChars, result, count+1);
b75a7d8f
A
274 return result;
275}
276
b75a7d8f 277static struct SResource *
4388f060 278parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
b75a7d8f
A
279{
280 struct SResource *result = NULL;
281 struct UString *tokenValue;
282 FileStream *file = NULL;
283 char filename[256] = { '\0' };
284 char cs[128] = { '\0' };
285 uint32_t line;
b75a7d8f
A
286 UBool quoted = FALSE;
287 UCHARBUF *ucbuf=NULL;
288 UChar32 c = 0;
289 const char* cp = NULL;
290 UChar *pTarget = NULL;
291 UChar *target = NULL;
292 UChar *targetLimit = NULL;
293 int32_t size = 0;
294
729e4ab9 295 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
b75a7d8f
A
296
297 if(isVerbose()){
374ca955 298 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
299 }
300
301 if (U_FAILURE(*status))
302 {
303 return NULL;
304 }
305 /* make the filename including the directory */
729e4ab9 306 if (state->inputdir != NULL)
b75a7d8f 307 {
729e4ab9 308 uprv_strcat(filename, state->inputdir);
b75a7d8f 309
729e4ab9 310 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
b75a7d8f
A
311 {
312 uprv_strcat(filename, U_FILE_SEP_STRING);
313 }
314 }
315
316 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
317
729e4ab9 318 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
319
320 if (U_FAILURE(*status))
321 {
322 return NULL;
323 }
b75a7d8f
A
324 uprv_strcat(filename, cs);
325
46f4442e
A
326 if(gOmitCollationRules) {
327 return res_none();
328 }
b75a7d8f
A
329
330 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
331
332 if (U_FAILURE(*status)) {
333 error(line, "An error occured while opening the input file %s\n", filename);
334 return NULL;
335 }
336
337 /* We allocate more space than actually required
338 * since the actual size needed for storing UChars
339 * is not known in UTF-8 byte stream
340 */
73c04bcf 341 size = ucbuf_size(ucbuf) + 1;
b75a7d8f 342 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
374ca955 343 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
b75a7d8f
A
344 target = pTarget;
345 targetLimit = pTarget+size;
346
347 /* read the rules into the buffer */
348 while (target < targetLimit)
349 {
350 c = ucbuf_getc(ucbuf, status);
351 if(c == QUOTE) {
73c04bcf 352 quoted = (UBool)!quoted;
b75a7d8f
A
353 }
354 /* weiv (06/26/2002): adding the following:
355 * - preserving spaces in commands [...]
356 * - # comments until the end of line
357 */
374ca955 358 if (c == STARTCOMMAND && !quoted)
b75a7d8f 359 {
73c04bcf
A
360 /* preserve commands
361 * closing bracket will be handled by the
362 * append at the end of the loop
363 */
364 while(c != ENDCOMMAND) {
51004dcb 365 U_APPEND_CHAR32_ONLY(c, target);
73c04bcf
A
366 c = ucbuf_getc(ucbuf, status);
367 }
368 }
369 else if (c == HASH && !quoted) {
370 /* skip comments */
371 while(c != CR && c != LF) {
372 c = ucbuf_getc(ucbuf, status);
373 }
374 continue;
375 }
376 else if (c == ESCAPE)
b75a7d8f
A
377 {
378 c = unescape(ucbuf, status);
379
4388f060 380 if (c == (UChar32)U_ERR)
b75a7d8f
A
381 {
382 uprv_free(pTarget);
383 T_FileStream_close(file);
384 return NULL;
385 }
386 }
374ca955 387 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
b75a7d8f 388 {
73c04bcf
A
389 /* ignore spaces carriage returns
390 * and line feed unless in the form \uXXXX
b75a7d8f
A
391 */
392 continue;
393 }
394
395 /* Append UChar * after dissembling if c > 0xffff*/
4388f060 396 if (c != (UChar32)U_EOF)
b75a7d8f 397 {
51004dcb 398 U_APPEND_CHAR32_ONLY(c, target);
b75a7d8f
A
399 }
400 else
401 {
402 break;
403 }
404 }
405
374ca955
A
406 /* terminate the string */
407 if(target < targetLimit){
408 *target = 0x0000;
409 }
410
729e4ab9 411 result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
374ca955 412
b75a7d8f
A
413
414 ucbuf_close(ucbuf);
415 uprv_free(pTarget);
416 T_FileStream_close(file);
417
418 return result;
419}
420
73c04bcf 421static struct SResource *
4388f060 422parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
73c04bcf
A
423{
424 struct SResource *result = NULL;
425 struct UString *tokenValue;
426 FileStream *file = NULL;
427 char filename[256] = { '\0' };
428 char cs[128] = { '\0' };
429 uint32_t line;
430 UCHARBUF *ucbuf=NULL;
431 const char* cp = NULL;
432 UChar *pTarget = NULL;
433 const UChar *pSource = NULL;
434 int32_t size = 0;
435
729e4ab9 436 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
73c04bcf
A
437
438 if(isVerbose()){
439 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
440 }
441
442 if (U_FAILURE(*status))
443 {
444 return NULL;
445 }
446 /* make the filename including the directory */
729e4ab9 447 if (state->inputdir != NULL)
73c04bcf 448 {
729e4ab9 449 uprv_strcat(filename, state->inputdir);
73c04bcf 450
729e4ab9 451 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
73c04bcf
A
452 {
453 uprv_strcat(filename, U_FILE_SEP_STRING);
454 }
455 }
456
457 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
458
729e4ab9 459 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
73c04bcf
A
460
461 if (U_FAILURE(*status))
462 {
463 return NULL;
464 }
465 uprv_strcat(filename, cs);
466
467
468 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
469
470 if (U_FAILURE(*status)) {
471 error(line, "An error occured while opening the input file %s\n", filename);
472 return NULL;
473 }
474
475 /* We allocate more space than actually required
476 * since the actual size needed for storing UChars
477 * is not known in UTF-8 byte stream
478 */
479 pSource = ucbuf_getBuffer(ucbuf, &size, status);
480 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
481 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
482
483#if !UCONFIG_NO_TRANSLITERATION
484 size = utrans_stripRules(pSource, size, pTarget, status);
485#else
46f4442e 486 size = 0;
73c04bcf
A
487 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
488#endif
729e4ab9 489 result = string_open(state->bundle, tag, pTarget, size, NULL, status);
73c04bcf
A
490
491 ucbuf_close(ucbuf);
492 uprv_free(pTarget);
493 T_FileStream_close(file);
494
495 return result;
496}
497static struct SResource* dependencyArray = NULL;
498
499static struct SResource *
729e4ab9 500parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
73c04bcf
A
501{
502 struct SResource *result = NULL;
503 struct SResource *elem = NULL;
504 struct UString *tokenValue;
505 uint32_t line;
506 char filename[256] = { '\0' };
507 char cs[128] = { '\0' };
508
729e4ab9 509 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
73c04bcf
A
510
511 if(isVerbose()){
512 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
513 }
514
515 if (U_FAILURE(*status))
516 {
517 return NULL;
518 }
519 /* make the filename including the directory */
729e4ab9 520 if (state->outputdir != NULL)
73c04bcf 521 {
729e4ab9 522 uprv_strcat(filename, state->outputdir);
73c04bcf 523
729e4ab9 524 if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
73c04bcf
A
525 {
526 uprv_strcat(filename, U_FILE_SEP_STRING);
527 }
528 }
529
530 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
531
532 if (U_FAILURE(*status))
533 {
534 return NULL;
535 }
536 uprv_strcat(filename, cs);
537 if(!T_FileStream_file_exists(filename)){
538 if(isStrict()){
539 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
540 }else{
541 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
542 }
543 }
544 if(dependencyArray==NULL){
729e4ab9 545 dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
73c04bcf
A
546 }
547 if(tag!=NULL){
729e4ab9 548 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
73c04bcf 549 }
729e4ab9 550 elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
73c04bcf
A
551
552 array_add(dependencyArray, elem, status);
553
554 if (U_FAILURE(*status))
555 {
556 return NULL;
557 }
729e4ab9 558 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
73c04bcf
A
559 return result;
560}
b75a7d8f 561static struct SResource *
729e4ab9 562parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
b75a7d8f
A
563{
564 struct UString *tokenValue;
565 struct SResource *result = NULL;
566
73c04bcf 567/* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
b75a7d8f
A
568 {
569 return parseUCARules(tag, startline, status);
73c04bcf 570 }*/
b75a7d8f 571 if(isVerbose()){
374ca955 572 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f 573 }
729e4ab9 574 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
b75a7d8f
A
575
576 if (U_SUCCESS(*status))
577 {
578 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
579 doesn't survive expect either) */
580
729e4ab9 581 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
b75a7d8f 582 if(U_SUCCESS(*status) && result) {
729e4ab9 583 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f 584
46f4442e
A
585 if (U_FAILURE(*status))
586 {
587 res_close(result);
588 return NULL;
589 }
b75a7d8f
A
590 }
591 }
592
593 return result;
594}
595
596static struct SResource *
729e4ab9 597parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
598{
599 struct UString *tokenValue;
374ca955 600 struct SResource *result = NULL;
b75a7d8f 601
729e4ab9 602 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
b75a7d8f
A
603
604 if(isVerbose()){
374ca955 605 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
606 }
607
608 if (U_SUCCESS(*status))
609 {
610 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
611 doesn't survive expect either) */
612
729e4ab9 613 result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
b75a7d8f 614
729e4ab9 615 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
616
617 if (U_FAILURE(*status))
618 {
46f4442e 619 res_close(result);
b75a7d8f
A
620 return NULL;
621 }
622 }
623
624 return result;
625}
626
729e4ab9
A
627typedef struct{
628 const char* inputDir;
629 const char* outputDir;
630} GenrbData;
631
632static struct SResource* resLookup(struct SResource* res, const char* key){
633 struct SResource *current = NULL;
634 struct SResTable *list;
635 if (res == res_none()) {
636 return NULL;
637 }
638
639 list = &(res->u.fTable);
640
641 current = list->fFirst;
642 while (current != NULL) {
643 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
644 return current;
645 }
646 current = current->fNext;
647 }
648 return NULL;
649}
650
651static const UChar* importFromDataFile(void* context, const char* locale, const char* type, int32_t* pLength, UErrorCode* status){
652 struct SRBRoot *data = NULL;
653 UCHARBUF *ucbuf = NULL;
654 GenrbData* genrbdata = (GenrbData*) context;
655 int localeLength = strlen(locale);
656 char* filename = (char*)uprv_malloc(localeLength+5);
657 char *inputDirBuf = NULL;
658 char *openFileName = NULL;
659 const char* cp = "";
660 UChar* urules = NULL;
661 int32_t urulesLength = 0;
662 int32_t i = 0;
663 int32_t dirlen = 0;
664 int32_t filelen = 0;
665 struct SResource* root;
666 struct SResource* collations;
667 struct SResource* collation;
668 struct SResource* sequence;
669
670 memcpy(filename, locale, localeLength);
671 for(i = 0; i < localeLength; i++){
672 if(filename[i] == '-'){
673 filename[i] = '_';
674 }
675 }
676 filename[localeLength] = '.';
677 filename[localeLength+1] = 't';
678 filename[localeLength+2] = 'x';
679 filename[localeLength+3] = 't';
680 filename[localeLength+4] = 0;
681
682
683 if (status==NULL || U_FAILURE(*status)) {
684 return NULL;
685 }
686 if(filename==NULL){
687 *status=U_ILLEGAL_ARGUMENT_ERROR;
688 return NULL;
689 }else{
690 filelen = (int32_t)uprv_strlen(filename);
691 }
692 if(genrbdata->inputDir == NULL) {
693 const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
694 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
695 openFileName[0] = '\0';
696 if (filenameBegin != NULL) {
697 /*
698 * When a filename ../../../data/root.txt is specified,
699 * we presume that the input directory is ../../../data
700 * This is very important when the resource file includes
701 * another file, like UCARules.txt or thaidict.brk.
702 */
703 int32_t filenameSize = (int32_t)(filenameBegin - filename + 1);
704 inputDirBuf = uprv_strncpy((char *)uprv_malloc(filenameSize), filename, filenameSize);
705
706 /* test for NULL */
707 if(inputDirBuf == NULL) {
708 *status = U_MEMORY_ALLOCATION_ERROR;
709 goto finish;
710 }
711
712 inputDirBuf[filenameSize - 1] = 0;
713 genrbdata->inputDir = inputDirBuf;
714 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir);
715 }
716 }else{
717 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir);
718
719 if(genrbdata->inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
720 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
721
722 /* test for NULL */
723 if(openFileName == NULL) {
724 *status = U_MEMORY_ALLOCATION_ERROR;
725 goto finish;
726 }
727
728 openFileName[0] = '\0';
729 /*
730 * append the input dir to openFileName if the first char in
731 * filename is not file seperation char and the last char input directory is not '.'.
732 * This is to support :
733 * genrb -s. /home/icu/data
734 * genrb -s. icu/data
735 * The user cannot mix notations like
736 * genrb -s. /icu/data --- the absolute path specified. -s redundant
737 * user should use
738 * genrb -s. icu/data --- start from CWD and look in icu/data dir
739 */
740 if( (filename[0] != U_FILE_SEP_CHAR) && (genrbdata->inputDir[dirlen-1] !='.')){
741 uprv_strcpy(openFileName, genrbdata->inputDir);
742 openFileName[dirlen] = U_FILE_SEP_CHAR;
743 }
744 openFileName[dirlen + 1] = '\0';
745 } else {
746 openFileName = (char *) uprv_malloc(dirlen + filelen + 1);
747
748 /* test for NULL */
749 if(openFileName == NULL) {
750 *status = U_MEMORY_ALLOCATION_ERROR;
751 goto finish;
752 }
753
754 uprv_strcpy(openFileName, genrbdata->inputDir);
755
756 }
757 }
758 uprv_strcat(openFileName, filename);
759 /* printf("%s\n", openFileName); */
760 *status = U_ZERO_ERROR;
761 ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, status);
762
763 if(*status == U_FILE_ACCESS_ERROR) {
764
765 fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName);
766 goto finish;
767 }
768 if (ucbuf == NULL || U_FAILURE(*status)) {
769 fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(*status));
770 goto finish;
771 }
772
773 /* Parse the data into an SRBRoot */
4388f060 774 data = parse(ucbuf, genrbdata->inputDir, genrbdata->outputDir, FALSE, status);
729e4ab9
A
775
776 root = data->fRoot;
777 collations = resLookup(root, "collations");
4388f060
A
778 if (collations != NULL) {
779 collation = resLookup(collations, type);
780 if (collation != NULL) {
781 sequence = resLookup(collation, "Sequence");
782 if (sequence != NULL) {
783 urules = sequence->u.fString.fChars;
784 urulesLength = sequence->u.fString.fLength;
785 *pLength = urulesLength;
786 }
787 }
788 }
729e4ab9
A
789
790finish:
729e4ab9
A
791 if (inputDirBuf != NULL) {
792 uprv_free(inputDirBuf);
793 }
794
795 if (openFileName != NULL) {
796 uprv_free(openFileName);
797 }
798
799 if(ucbuf) {
800 ucbuf_close(ucbuf);
801 }
802
803 return urules;
804}
805
4388f060
A
806// Quick-and-dirty escaping function.
807// Assumes that we are on an ASCII-based platform.
808static void
809escape(const UChar *s, char *buffer) {
810 int32_t length = u_strlen(s);
811 int32_t i = 0;
812 for (;;) {
813 UChar32 c;
814 U16_NEXT(s, i, length, c);
815 if (c == 0) {
816 *buffer = 0;
817 return;
818 } else if (0x20 <= c && c <= 0x7e) {
819 // printable ASCII
820 *buffer++ = (char)c; // assumes ASCII-based platform
821 } else {
822 buffer += sprintf(buffer, "\\u%04X", (int)c);
823 }
824 }
825}
826
b75a7d8f 827static struct SResource *
729e4ab9 828addCollation(ParseState* state, struct SResource *result, uint32_t startline, UErrorCode *status)
b75a7d8f 829{
b75a7d8f
A
830 struct SResource *member = NULL;
831 struct UString *tokenValue;
374ca955 832 struct UString comment;
b75a7d8f
A
833 enum ETokenType token;
834 char subtag[1024];
835 UVersionInfo version;
b75a7d8f 836 uint32_t line;
729e4ab9 837 GenrbData genrbdata;
b75a7d8f 838 /* '{' . (name resource)* '}' */
374ca955
A
839 version[0]=0; version[1]=0; version[2]=0; version[3]=0;
840
b75a7d8f
A
841 for (;;)
842 {
374ca955 843 ustr_init(&comment);
729e4ab9 844 token = getToken(state, &tokenValue, &comment, &line, status);
b75a7d8f
A
845
846 if (token == TOK_CLOSE_BRACE)
847 {
848 return result;
849 }
850
851 if (token != TOK_STRING)
852 {
46f4442e 853 res_close(result);
b75a7d8f
A
854 *status = U_INVALID_FORMAT_ERROR;
855
856 if (token == TOK_EOF)
857 {
858 error(startline, "unterminated table");
859 }
860 else
861 {
862 error(line, "Unexpected token %s", tokenNames[token]);
863 }
864
865 return NULL;
866 }
867
868 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
869
870 if (U_FAILURE(*status))
871 {
46f4442e 872 res_close(result);
b75a7d8f
A
873 return NULL;
874 }
875
729e4ab9 876 member = parseResource(state, subtag, NULL, status);
b75a7d8f
A
877
878 if (U_FAILURE(*status))
879 {
46f4442e 880 res_close(result);
b75a7d8f
A
881 return NULL;
882 }
883
884 if (uprv_strcmp(subtag, "Version") == 0)
885 {
886 char ver[40];
887 int32_t length = member->u.fString.fLength;
888
889 if (length >= (int32_t) sizeof(ver))
890 {
891 length = (int32_t) sizeof(ver) - 1;
892 }
893
894 u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
895 u_versionFromString(version, ver);
896
897 table_add(result, member, line, status);
898
899 }
900 else if (uprv_strcmp(subtag, "Override") == 0)
901 {
4388f060 902 // UBool override = (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0);
b75a7d8f
A
903 table_add(result, member, line, status);
904
905 }
906 else if(uprv_strcmp(subtag, "%%CollationBin")==0)
907 {
908 /* discard duplicate %%CollationBin if any*/
909 }
910 else if (uprv_strcmp(subtag, "Sequence") == 0)
911 {
729e4ab9
A
912#if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
913 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
b75a7d8f 914#else
4388f060 915 if(state->makeBinaryCollation) {
b75a7d8f
A
916
917 /* do the collation elements */
918 int32_t len = 0;
919 uint8_t *data = NULL;
920 UCollator *coll = NULL;
729e4ab9 921 int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)];
4388f060 922 int32_t reorderCodeCount;
729e4ab9 923 int32_t reorderCodeIndex;
b75a7d8f 924 UParseError parseError;
b75a7d8f 925
729e4ab9
A
926 genrbdata.inputDir = state->inputdir;
927 genrbdata.outputDir = state->outputdir;
928
4388f060
A
929 UErrorCode intStatus = U_ZERO_ERROR;
930 uprv_memset(&parseError, 0, sizeof(parseError));
729e4ab9
A
931 coll = ucol_openRulesForImport(member->u.fString.fChars, member->u.fString.fLength,
932 UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, importFromDataFile, &genrbdata, &intStatus);
b75a7d8f
A
933
934 if (U_SUCCESS(intStatus) && coll != NULL)
935 {
374ca955
A
936 len = ucol_cloneBinary(coll, NULL, 0, &intStatus);
937 data = (uint8_t *)uprv_malloc(len);
73c04bcf 938 intStatus = U_ZERO_ERROR;
374ca955
A
939 len = ucol_cloneBinary(coll, data, len, &intStatus);
940 /*data = ucol_cloneRuleData(coll, &len, &intStatus);*/
b75a7d8f
A
941
942 /* tailoring rules version */
943 /* This is wrong! */
944 /*coll->dataInfo.dataVersion[1] = version[0];*/
945 /* Copy tailoring version. Builder version already */
946 /* set in ucol_openRules */
947 ((UCATableHeader *)data)->version[1] = version[0];
948 ((UCATableHeader *)data)->version[2] = version[1];
949 ((UCATableHeader *)data)->version[3] = version[2];
950
951 if (U_SUCCESS(intStatus) && data != NULL)
952 {
729e4ab9
A
953 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", len, data, NULL, NULL, status);
954 table_add(result, collationBin, line, status);
b75a7d8f 955 uprv_free(data);
4388f060 956
729e4ab9
A
957 reorderCodeCount = ucol_getReorderCodes(
958 coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST), &intStatus);
959 if (U_SUCCESS(intStatus) && reorderCodeCount > 0) {
960 struct SResource *reorderCodeRes = intvector_open(state->bundle, "%%ReorderCodes", NULL, status);
961 for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodeCount; reorderCodeIndex++) {
962 intvector_add(reorderCodeRes, reorderCodes[reorderCodeIndex], status);
963 }
964 table_add(result, reorderCodeRes, line, status);
965 }
b75a7d8f
A
966 }
967 else
968 {
969 warning(line, "could not obtain rules from collator");
970 if(isStrict()){
971 *status = U_INVALID_FORMAT_ERROR;
972 return NULL;
973 }
974 }
975
976 ucol_close(coll);
977 }
978 else
979 {
729e4ab9 980 if(intStatus == U_FILE_ACCESS_ERROR) {
4388f060
A
981 error(startline, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly.");
982 *status = intStatus;
983 return NULL;
729e4ab9 984 }
4388f060
A
985 char preBuffer[100], postBuffer[100];
986 escape(parseError.preContext, preBuffer);
987 escape(parseError.postContext, postBuffer);
988 warning(line,
989 "%%%%CollationBin could not be constructed from CollationElements\n"
990 " check context, check that the FractionalUCA.txt UCA version "
991 "matches the current UCD version\n"
992 " UErrorCode=%s UParseError={ line=%d offset=%d pre=<> post=<> }",
993 u_errorName(intStatus),
994 parseError.line,
995 parseError.offset,
996 preBuffer,
997 postBuffer);
b75a7d8f 998 if(isStrict()){
374ca955 999 *status = intStatus;
b75a7d8f
A
1000 return NULL;
1001 }
1002 }
1003 } else {
1004 if(isVerbose()) {
1005 printf("Not building Collation binary\n");
1006 }
1007 }
1008#endif
729e4ab9
A
1009 /* in order to achieve smaller data files, we can direct genrb */
1010 /* to omit collation rules */
1011 if(gOmitCollationRules) {
1012 bundle_closeString(state->bundle, member);
1013 } else {
1014 table_add(result, member, line, status);
1015 }
b75a7d8f 1016 }
b75a7d8f
A
1017 if (U_FAILURE(*status))
1018 {
46f4442e 1019 res_close(result);
b75a7d8f
A
1020 return NULL;
1021 }
1022 }
1023
4388f060
A
1024 // Reached the end without a TOK_CLOSE_BRACE. Should be an error.
1025 *status = U_INTERNAL_PROGRAM_ERROR;
1026 return NULL;
b75a7d8f
A
1027}
1028
374ca955 1029static struct SResource *
729e4ab9 1030parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
374ca955
A
1031{
1032 struct SResource *result = NULL;
1033 struct SResource *member = NULL;
1034 struct SResource *collationRes = NULL;
1035 struct UString *tokenValue;
1036 struct UString comment;
1037 enum ETokenType token;
1038 char subtag[1024], typeKeyword[1024];
1039 uint32_t line;
1040
729e4ab9 1041 result = table_open(state->bundle, tag, NULL, status);
374ca955
A
1042
1043 if (result == NULL || U_FAILURE(*status))
1044 {
1045 return NULL;
1046 }
1047 if(isVerbose()){
1048 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1049 }
1050 if(!newCollation) {
729e4ab9 1051 return addCollation(state, result, startline, status);
73c04bcf
A
1052 }
1053 else {
1054 for(;;) {
1055 ustr_init(&comment);
729e4ab9 1056 token = getToken(state, &tokenValue, &comment, &line, status);
374ca955 1057
73c04bcf 1058 if (token == TOK_CLOSE_BRACE)
374ca955 1059 {
73c04bcf 1060 return result;
374ca955 1061 }
73c04bcf
A
1062
1063 if (token != TOK_STRING)
374ca955 1064 {
46f4442e 1065 res_close(result);
73c04bcf
A
1066 *status = U_INVALID_FORMAT_ERROR;
1067
1068 if (token == TOK_EOF)
1069 {
1070 error(startline, "unterminated table");
1071 }
1072 else
1073 {
1074 error(line, "Unexpected token %s", tokenNames[token]);
1075 }
1076
1077 return NULL;
374ca955
A
1078 }
1079
73c04bcf 1080 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
374ca955 1081
73c04bcf
A
1082 if (U_FAILURE(*status))
1083 {
46f4442e 1084 res_close(result);
73c04bcf
A
1085 return NULL;
1086 }
374ca955 1087
73c04bcf
A
1088 if (uprv_strcmp(subtag, "default") == 0)
1089 {
729e4ab9 1090 member = parseResource(state, subtag, NULL, status);
374ca955 1091
73c04bcf
A
1092 if (U_FAILURE(*status))
1093 {
46f4442e 1094 res_close(result);
73c04bcf
A
1095 return NULL;
1096 }
374ca955 1097
73c04bcf
A
1098 table_add(result, member, line, status);
1099 }
1100 else
1101 {
729e4ab9 1102 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
73c04bcf
A
1103 /* this probably needs to be refactored or recursively use the parser */
1104 /* first we assume that our collation table won't have the explicit type */
1105 /* then, we cannot handle aliases */
1106 if(token == TOK_OPEN_BRACE) {
729e4ab9
A
1107 token = getToken(state, &tokenValue, &comment, &line, status);
1108 collationRes = table_open(state->bundle, subtag, NULL, status);
1109 collationRes = addCollation(state, collationRes, startline, status); /* need to parse the collation data regardless */
1110 if (gIncludeUnihanColl || uprv_strcmp(subtag, "unihan") != 0) {
1111 table_add(result, collationRes, startline, status);
1112 }
73c04bcf
A
1113 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
1114 /* we could have a table too */
729e4ab9 1115 token = peekToken(state, 1, &tokenValue, &line, &comment, status);
73c04bcf
A
1116 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
1117 if(uprv_strcmp(typeKeyword, "alias") == 0) {
729e4ab9 1118 member = parseResource(state, subtag, NULL, status);
73c04bcf
A
1119 if (U_FAILURE(*status))
1120 {
46f4442e 1121 res_close(result);
73c04bcf
A
1122 return NULL;
1123 }
374ca955 1124
73c04bcf
A
1125 table_add(result, member, line, status);
1126 } else {
46f4442e 1127 res_close(result);
73c04bcf
A
1128 *status = U_INVALID_FORMAT_ERROR;
1129 return NULL;
1130 }
1131 } else {
46f4442e 1132 res_close(result);
73c04bcf
A
1133 *status = U_INVALID_FORMAT_ERROR;
1134 return NULL;
1135 }
374ca955 1136 }
374ca955 1137
73c04bcf 1138 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
374ca955 1139
73c04bcf 1140 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
374ca955 1141
73c04bcf
A
1142 if (U_FAILURE(*status))
1143 {
46f4442e 1144 res_close(result);
73c04bcf
A
1145 return NULL;
1146 }
374ca955 1147 }
374ca955
A
1148 }
1149}
1150
b75a7d8f
A
1151/* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1152 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1153static struct SResource *
729e4ab9 1154realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
b75a7d8f
A
1155{
1156 struct SResource *member = NULL;
1157 struct UString *tokenValue=NULL;
374ca955 1158 struct UString comment;
b75a7d8f
A
1159 enum ETokenType token;
1160 char subtag[1024];
1161 uint32_t line;
1162 UBool readToken = FALSE;
1163
1164 /* '{' . (name resource)* '}' */
4388f060 1165
b75a7d8f 1166 if(isVerbose()){
374ca955 1167 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1168 }
1169 for (;;)
1170 {
374ca955 1171 ustr_init(&comment);
729e4ab9 1172 token = getToken(state, &tokenValue, &comment, &line, status);
b75a7d8f
A
1173
1174 if (token == TOK_CLOSE_BRACE)
1175 {
1176 if (!readToken) {
1177 warning(startline, "Encountered empty table");
1178 }
1179 return table;
1180 }
1181
1182 if (token != TOK_STRING)
1183 {
b75a7d8f
A
1184 *status = U_INVALID_FORMAT_ERROR;
1185
1186 if (token == TOK_EOF)
1187 {
1188 error(startline, "unterminated table");
1189 }
1190 else
1191 {
374ca955 1192 error(line, "unexpected token %s", tokenNames[token]);
b75a7d8f
A
1193 }
1194
1195 return NULL;
1196 }
1197
374ca955
A
1198 if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
1199 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1200 } else {
1201 *status = U_INVALID_FORMAT_ERROR;
1202 error(line, "invariant characters required for table keys");
374ca955
A
1203 return NULL;
1204 }
b75a7d8f
A
1205
1206 if (U_FAILURE(*status))
1207 {
729e4ab9 1208 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
b75a7d8f
A
1209 return NULL;
1210 }
1211
729e4ab9 1212 member = parseResource(state, subtag, &comment, status);
b75a7d8f
A
1213
1214 if (member == NULL || U_FAILURE(*status))
1215 {
729e4ab9 1216 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
b75a7d8f
A
1217 return NULL;
1218 }
1219
1220 table_add(table, member, line, status);
1221
1222 if (U_FAILURE(*status))
1223 {
729e4ab9 1224 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
b75a7d8f
A
1225 return NULL;
1226 }
1227 readToken = TRUE;
46f4442e 1228 ustr_deinit(&comment);
4388f060 1229 }
b75a7d8f
A
1230
1231 /* not reached */
1232 /* A compiler warning will appear if all paths don't contain a return statement. */
1233/* *status = U_INTERNAL_PROGRAM_ERROR;
1234 return NULL;*/
1235}
1236
1237static struct SResource *
729e4ab9 1238parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
1239{
1240 struct SResource *result;
1241
1242 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
1243 {
729e4ab9 1244 return parseCollationElements(state, tag, startline, FALSE, status);
374ca955
A
1245 }
1246 if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1247 {
729e4ab9 1248 return parseCollationElements(state, tag, startline, TRUE, status);
b75a7d8f
A
1249 }
1250 if(isVerbose()){
374ca955 1251 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f 1252 }
374ca955 1253
729e4ab9 1254 result = table_open(state->bundle, tag, comment, status);
b75a7d8f
A
1255
1256 if (result == NULL || U_FAILURE(*status))
1257 {
1258 return NULL;
1259 }
729e4ab9 1260 return realParseTable(state, result, tag, startline, status);
b75a7d8f
A
1261}
1262
1263static struct SResource *
729e4ab9 1264parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
1265{
1266 struct SResource *result = NULL;
1267 struct SResource *member = NULL;
1268 struct UString *tokenValue;
374ca955 1269 struct UString memberComments;
b75a7d8f
A
1270 enum ETokenType token;
1271 UBool readToken = FALSE;
1272
729e4ab9 1273 result = array_open(state->bundle, tag, comment, status);
b75a7d8f
A
1274
1275 if (result == NULL || U_FAILURE(*status))
1276 {
1277 return NULL;
1278 }
1279 if(isVerbose()){
374ca955 1280 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f 1281 }
374ca955
A
1282
1283 ustr_init(&memberComments);
1284
b75a7d8f
A
1285 /* '{' . resource [','] '}' */
1286 for (;;)
1287 {
374ca955
A
1288 /* reset length */
1289 ustr_setlen(&memberComments, 0, status);
1290
b75a7d8f 1291 /* check for end of array, but don't consume next token unless it really is the end */
729e4ab9 1292 token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
374ca955 1293
b75a7d8f
A
1294
1295 if (token == TOK_CLOSE_BRACE)
1296 {
729e4ab9 1297 getToken(state, NULL, NULL, NULL, status);
b75a7d8f
A
1298 if (!readToken) {
1299 warning(startline, "Encountered empty array");
1300 }
1301 break;
1302 }
1303
1304 if (token == TOK_EOF)
1305 {
46f4442e 1306 res_close(result);
b75a7d8f
A
1307 *status = U_INVALID_FORMAT_ERROR;
1308 error(startline, "unterminated array");
1309 return NULL;
1310 }
1311
1312 /* string arrays are a special case */
1313 if (token == TOK_STRING)
1314 {
729e4ab9
A
1315 getToken(state, &tokenValue, &memberComments, NULL, status);
1316 member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
b75a7d8f
A
1317 }
1318 else
1319 {
729e4ab9 1320 member = parseResource(state, NULL, &memberComments, status);
b75a7d8f
A
1321 }
1322
1323 if (member == NULL || U_FAILURE(*status))
1324 {
46f4442e 1325 res_close(result);
b75a7d8f
A
1326 return NULL;
1327 }
1328
1329 array_add(result, member, status);
1330
1331 if (U_FAILURE(*status))
1332 {
46f4442e 1333 res_close(result);
b75a7d8f
A
1334 return NULL;
1335 }
1336
1337 /* eat optional comma if present */
729e4ab9 1338 token = peekToken(state, 0, NULL, NULL, NULL, status);
b75a7d8f
A
1339
1340 if (token == TOK_COMMA)
1341 {
729e4ab9 1342 getToken(state, NULL, NULL, NULL, status);
b75a7d8f
A
1343 }
1344
1345 if (U_FAILURE(*status))
1346 {
46f4442e 1347 res_close(result);
b75a7d8f
A
1348 return NULL;
1349 }
1350 readToken = TRUE;
1351 }
1352
46f4442e 1353 ustr_deinit(&memberComments);
b75a7d8f
A
1354 return result;
1355}
1356
1357static struct SResource *
729e4ab9 1358parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
1359{
1360 struct SResource *result = NULL;
1361 enum ETokenType token;
1362 char *string;
1363 int32_t value;
1364 UBool readToken = FALSE;
b75a7d8f
A
1365 char *stopstring;
1366 uint32_t len;
374ca955 1367 struct UString memberComments;
b75a7d8f 1368
729e4ab9 1369 result = intvector_open(state->bundle, tag, comment, status);
b75a7d8f
A
1370
1371 if (result == NULL || U_FAILURE(*status))
1372 {
1373 return NULL;
1374 }
1375
1376 if(isVerbose()){
374ca955 1377 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f 1378 }
374ca955 1379 ustr_init(&memberComments);
b75a7d8f
A
1380 /* '{' . string [','] '}' */
1381 for (;;)
1382 {
374ca955
A
1383 ustr_setlen(&memberComments, 0, status);
1384
b75a7d8f 1385 /* check for end of array, but don't consume next token unless it really is the end */
729e4ab9 1386 token = peekToken(state, 0, NULL, NULL,&memberComments, status);
b75a7d8f
A
1387
1388 if (token == TOK_CLOSE_BRACE)
1389 {
1390 /* it's the end, consume the close brace */
729e4ab9 1391 getToken(state, NULL, NULL, NULL, status);
b75a7d8f
A
1392 if (!readToken) {
1393 warning(startline, "Encountered empty int vector");
1394 }
46f4442e 1395 ustr_deinit(&memberComments);
b75a7d8f
A
1396 return result;
1397 }
1398
729e4ab9 1399 string = getInvariantString(state, NULL, NULL, status);
b75a7d8f
A
1400
1401 if (U_FAILURE(*status))
1402 {
46f4442e 1403 res_close(result);
b75a7d8f
A
1404 return NULL;
1405 }
b75a7d8f 1406
46f4442e 1407 /* For handling illegal char in the Intvector */
b75a7d8f 1408 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
374ca955 1409 len=(uint32_t)(stopstring-string);
b75a7d8f
A
1410
1411 if(len==uprv_strlen(string))
1412 {
1413 intvector_add(result, value, status);
1414 uprv_free(string);
729e4ab9 1415 token = peekToken(state, 0, NULL, NULL, NULL, status);
b75a7d8f
A
1416 }
1417 else
1418 {
1419 uprv_free(string);
1420 *status=U_INVALID_CHAR_FOUND;
1421 }
b75a7d8f
A
1422
1423 if (U_FAILURE(*status))
1424 {
46f4442e 1425 res_close(result);
b75a7d8f
A
1426 return NULL;
1427 }
1428
1429 /* the comma is optional (even though it is required to prevent the reader from concatenating
1430 consecutive entries) so that a missing comma on the last entry isn't an error */
1431 if (token == TOK_COMMA)
1432 {
729e4ab9 1433 getToken(state, NULL, NULL, NULL, status);
b75a7d8f
A
1434 }
1435 readToken = TRUE;
1436 }
1437
1438 /* not reached */
1439 /* A compiler warning will appear if all paths don't contain a return statement. */
1440/* intvector_close(result, status);
1441 *status = U_INTERNAL_PROGRAM_ERROR;
1442 return NULL;*/
1443}
1444
1445static struct SResource *
729e4ab9 1446parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
1447{
1448 struct SResource *result = NULL;
1449 uint8_t *value;
1450 char *string;
1451 char toConv[3] = {'\0', '\0', '\0'};
1452 uint32_t count;
1453 uint32_t i;
1454 uint32_t line;
b75a7d8f
A
1455 char *stopstring;
1456 uint32_t len;
1457
729e4ab9 1458 string = getInvariantString(state, &line, NULL, status);
b75a7d8f
A
1459
1460 if (string == NULL || U_FAILURE(*status))
1461 {
1462 return NULL;
1463 }
1464
729e4ab9 1465 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
1466
1467 if (U_FAILURE(*status))
1468 {
1469 uprv_free(string);
1470 return NULL;
1471 }
1472
1473 if(isVerbose()){
374ca955 1474 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1475 }
1476
374ca955 1477 count = (uint32_t)uprv_strlen(string);
b75a7d8f
A
1478 if (count > 0){
1479 if((count % 2)==0){
51004dcb 1480 value = static_cast<uint8_t *>(uprv_malloc(sizeof(uint8_t) * count));
b75a7d8f
A
1481
1482 if (value == NULL)
1483 {
1484 uprv_free(string);
1485 *status = U_MEMORY_ALLOCATION_ERROR;
1486 return NULL;
1487 }
1488
1489 for (i = 0; i < count; i += 2)
1490 {
1491 toConv[0] = string[i];
1492 toConv[1] = string[i + 1];
1493
1494 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
374ca955 1495 len=(uint32_t)(stopstring-toConv);
b75a7d8f
A
1496
1497 if(len!=uprv_strlen(toConv))
1498 {
1499 uprv_free(string);
1500 *status=U_INVALID_CHAR_FOUND;
1501 return NULL;
1502 }
1503 }
1504
729e4ab9 1505 result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status);
b75a7d8f
A
1506
1507 uprv_free(value);
1508 }
1509 else
1510 {
1511 *status = U_INVALID_CHAR_FOUND;
1512 uprv_free(string);
1513 error(line, "Encountered invalid binary string");
1514 return NULL;
1515 }
1516 }
1517 else
1518 {
729e4ab9 1519 result = bin_open(state->bundle, tag, 0, NULL, "",comment,status);
b75a7d8f
A
1520 warning(startline, "Encountered empty binary tag");
1521 }
1522 uprv_free(string);
1523
1524 return result;
1525}
1526
1527static struct SResource *
729e4ab9 1528parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
1529{
1530 struct SResource *result = NULL;
1531 int32_t value;
1532 char *string;
b75a7d8f
A
1533 char *stopstring;
1534 uint32_t len;
1535
729e4ab9 1536 string = getInvariantString(state, NULL, NULL, status);
b75a7d8f
A
1537
1538 if (string == NULL || U_FAILURE(*status))
1539 {
1540 return NULL;
1541 }
1542
729e4ab9 1543 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
1544
1545 if (U_FAILURE(*status))
1546 {
1547 uprv_free(string);
1548 return NULL;
1549 }
1550
1551 if(isVerbose()){
374ca955 1552 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1553 }
1554
1555 if (uprv_strlen(string) <= 0)
1556 {
1557 warning(startline, "Encountered empty integer. Default value is 0.");
1558 }
1559
46f4442e
A
1560 /* Allow integer support for hexdecimal, octal digit and decimal*/
1561 /* and handle illegal char in the integer*/
b75a7d8f 1562 value = uprv_strtoul(string, &stopstring, 0);
374ca955 1563 len=(uint32_t)(stopstring-string);
b75a7d8f
A
1564 if(len==uprv_strlen(string))
1565 {
729e4ab9 1566 result = int_open(state->bundle, tag, value, comment, status);
b75a7d8f
A
1567 }
1568 else
1569 {
1570 *status=U_INVALID_CHAR_FOUND;
1571 }
1572 uprv_free(string);
1573
1574 return result;
1575}
1576
1577static struct SResource *
729e4ab9 1578parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
b75a7d8f
A
1579{
1580 struct SResource *result;
1581 FileStream *file;
1582 int32_t len;
1583 uint8_t *data;
1584 char *filename;
1585 uint32_t line;
1586 char *fullname = NULL;
729e4ab9 1587 filename = getInvariantString(state, &line, NULL, status);
b75a7d8f
A
1588
1589 if (U_FAILURE(*status))
1590 {
1591 return NULL;
1592 }
1593
729e4ab9 1594 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
1595
1596 if (U_FAILURE(*status))
1597 {
1598 uprv_free(filename);
1599 return NULL;
1600 }
1601
1602 if(isVerbose()){
374ca955 1603 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1604 }
1605
1606 /* Open the input file for reading */
729e4ab9 1607 if (state->inputdir == NULL)
b75a7d8f 1608 {
46f4442e
A
1609#if 1
1610 /*
1611 * Always save file file name, even if there's
1612 * no input directory specified. MIGHT BREAK SOMETHING
1613 */
1614 int32_t filenameLength = uprv_strlen(filename);
1615
1616 fullname = (char *) uprv_malloc(filenameLength + 1);
1617 uprv_strcpy(fullname, filename);
1618#endif
1619
b75a7d8f
A
1620 file = T_FileStream_open(filename, "rb");
1621 }
1622 else
1623 {
1624
374ca955 1625 int32_t count = (int32_t)uprv_strlen(filename);
b75a7d8f 1626
729e4ab9 1627 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
b75a7d8f 1628 {
729e4ab9 1629 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
b75a7d8f
A
1630
1631 /* test for NULL */
1632 if(fullname == NULL)
1633 {
1634 *status = U_MEMORY_ALLOCATION_ERROR;
1635 return NULL;
1636 }
1637
729e4ab9 1638 uprv_strcpy(fullname, state->inputdir);
b75a7d8f 1639
729e4ab9
A
1640 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1641 fullname[state->inputdirLength + 1] = '\0';
b75a7d8f
A
1642
1643 uprv_strcat(fullname, filename);
1644 }
1645 else
1646 {
729e4ab9 1647 fullname = (char *) uprv_malloc(state->inputdirLength + count + 1);
b75a7d8f
A
1648
1649 /* test for NULL */
1650 if(fullname == NULL)
1651 {
1652 *status = U_MEMORY_ALLOCATION_ERROR;
1653 return NULL;
1654 }
1655
729e4ab9 1656 uprv_strcpy(fullname, state->inputdir);
b75a7d8f
A
1657 uprv_strcat(fullname, filename);
1658 }
1659
1660 file = T_FileStream_open(fullname, "rb");
1661
1662 }
1663
1664 if (file == NULL)
1665 {
1666 error(line, "couldn't open input file %s", filename);
1667 *status = U_FILE_ACCESS_ERROR;
1668 return NULL;
1669 }
1670
1671 len = T_FileStream_size(file);
1672 data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
1673 /* test for NULL */
1674 if(data == NULL)
1675 {
1676 *status = U_MEMORY_ALLOCATION_ERROR;
1677 T_FileStream_close (file);
1678 return NULL;
1679 }
1680
4388f060 1681 /* int32_t numRead = */ T_FileStream_read (file, data, len);
b75a7d8f
A
1682 T_FileStream_close (file);
1683
729e4ab9 1684 result = bin_open(state->bundle, tag, len, data, fullname, comment, status);
b75a7d8f
A
1685
1686 uprv_free(data);
1687 uprv_free(filename);
1688 uprv_free(fullname);
1689
1690 return result;
1691}
1692
1693static struct SResource *
729e4ab9 1694parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
b75a7d8f
A
1695{
1696 struct SResource *result;
1697 int32_t len=0;
1698 char *filename;
1699 uint32_t line;
1700 UChar *pTarget = NULL;
1701
1702 UCHARBUF *ucbuf;
1703 char *fullname = NULL;
1704 int32_t count = 0;
1705 const char* cp = NULL;
1706 const UChar* uBuffer = NULL;
1707
729e4ab9 1708 filename = getInvariantString(state, &line, NULL, status);
374ca955 1709 count = (int32_t)uprv_strlen(filename);
b75a7d8f
A
1710
1711 if (U_FAILURE(*status))
1712 {
1713 return NULL;
1714 }
1715
729e4ab9 1716 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
1717
1718 if (U_FAILURE(*status))
1719 {
1720 uprv_free(filename);
1721 return NULL;
1722 }
1723
1724 if(isVerbose()){
374ca955 1725 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1726 }
1727
729e4ab9 1728 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
b75a7d8f
A
1729 /* test for NULL */
1730 if(fullname == NULL)
1731 {
1732 *status = U_MEMORY_ALLOCATION_ERROR;
1733 uprv_free(filename);
1734 return NULL;
374ca955 1735 }
b75a7d8f 1736
729e4ab9
A
1737 if(state->inputdir!=NULL){
1738 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
b75a7d8f
A
1739 {
1740
729e4ab9 1741 uprv_strcpy(fullname, state->inputdir);
b75a7d8f 1742
729e4ab9
A
1743 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1744 fullname[state->inputdirLength + 1] = '\0';
b75a7d8f
A
1745
1746 uprv_strcat(fullname, filename);
1747 }
1748 else
1749 {
729e4ab9 1750 uprv_strcpy(fullname, state->inputdir);
b75a7d8f
A
1751 uprv_strcat(fullname, filename);
1752 }
1753 }else{
1754 uprv_strcpy(fullname,filename);
1755 }
1756
1757 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1758
1759 if (U_FAILURE(*status)) {
1760 error(line, "couldn't open input file %s\n", filename);
1761 return NULL;
1762 }
1763
1764 uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
729e4ab9 1765 result = string_open(state->bundle, tag, uBuffer, len, comment, status);
b75a7d8f 1766
51004dcb
A
1767 ucbuf_close(ucbuf);
1768
b75a7d8f
A
1769 uprv_free(pTarget);
1770
1771 uprv_free(filename);
1772 uprv_free(fullname);
1773
1774 return result;
1775}
1776
73c04bcf
A
1777
1778
1779
1780
1781U_STRING_DECL(k_type_string, "string", 6);
1782U_STRING_DECL(k_type_binary, "binary", 6);
1783U_STRING_DECL(k_type_bin, "bin", 3);
1784U_STRING_DECL(k_type_table, "table", 5);
1785U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17);
1786U_STRING_DECL(k_type_int, "int", 3);
1787U_STRING_DECL(k_type_integer, "integer", 7);
1788U_STRING_DECL(k_type_array, "array", 5);
1789U_STRING_DECL(k_type_alias, "alias", 5);
1790U_STRING_DECL(k_type_intvector, "intvector", 9);
1791U_STRING_DECL(k_type_import, "import", 6);
1792U_STRING_DECL(k_type_include, "include", 7);
73c04bcf
A
1793
1794/* Various non-standard processing plugins that create one or more special resources. */
1795U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1796U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18);
1797U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23);
1798U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19);
1799
1800typedef enum EResourceType
1801{
1802 RT_UNKNOWN,
1803 RT_STRING,
1804 RT_BINARY,
1805 RT_TABLE,
1806 RT_TABLE_NO_FALLBACK,
1807 RT_INTEGER,
1808 RT_ARRAY,
1809 RT_ALIAS,
1810 RT_INTVECTOR,
1811 RT_IMPORT,
1812 RT_INCLUDE,
1813 RT_PROCESS_UCA_RULES,
1814 RT_PROCESS_COLLATION,
1815 RT_PROCESS_TRANSLITERATOR,
1816 RT_PROCESS_DEPENDENCY,
1817 RT_RESERVED
1818} EResourceType;
1819
1820static struct {
1821 const char *nameChars; /* only used for debugging */
1822 const UChar *nameUChars;
1823 ParseResourceFunction *parseFunction;
1824} gResourceTypes[] = {
1825 {"Unknown", NULL, NULL},
1826 {"string", k_type_string, parseString},
1827 {"binary", k_type_binary, parseBinary},
1828 {"table", k_type_table, parseTable},
1829 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1830 {"integer", k_type_integer, parseInteger},
1831 {"array", k_type_array, parseArray},
1832 {"alias", k_type_alias, parseAlias},
1833 {"intvector", k_type_intvector, parseIntVector},
1834 {"import", k_type_import, parseImport},
1835 {"include", k_type_include, parseInclude},
1836 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1837 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1838 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1839 {"process(dependency)", k_type_plugin_dependency, parseDependency},
1840 {"reserved", NULL, NULL}
1841};
1842
4388f060 1843void initParser(UBool omitCollationRules)
73c04bcf 1844{
73c04bcf
A
1845 U_STRING_INIT(k_type_string, "string", 6);
1846 U_STRING_INIT(k_type_binary, "binary", 6);
1847 U_STRING_INIT(k_type_bin, "bin", 3);
1848 U_STRING_INIT(k_type_table, "table", 5);
1849 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17);
1850 U_STRING_INIT(k_type_int, "int", 3);
1851 U_STRING_INIT(k_type_integer, "integer", 7);
1852 U_STRING_INIT(k_type_array, "array", 5);
1853 U_STRING_INIT(k_type_alias, "alias", 5);
1854 U_STRING_INIT(k_type_intvector, "intvector", 9);
1855 U_STRING_INIT(k_type_import, "import", 6);
73c04bcf
A
1856 U_STRING_INIT(k_type_include, "include", 7);
1857
1858 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1859 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18);
1860 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23);
1861 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19);
729e4ab9 1862
46f4442e 1863 gOmitCollationRules = omitCollationRules;
73c04bcf
A
1864}
1865
4388f060 1866static inline UBool isTable(enum EResourceType type) {
73c04bcf
A
1867 return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK);
1868}
1869
1870static enum EResourceType
729e4ab9 1871parseResourceType(ParseState* state, UErrorCode *status)
73c04bcf
A
1872{
1873 struct UString *tokenValue;
1874 struct UString comment;
1875 enum EResourceType result = RT_UNKNOWN;
1876 uint32_t line=0;
1877 ustr_init(&comment);
729e4ab9 1878 expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
73c04bcf
A
1879
1880 if (U_FAILURE(*status))
1881 {
1882 return RT_UNKNOWN;
1883 }
1884
1885 *status = U_ZERO_ERROR;
1886
1887 /* Search for normal types */
1888 result=RT_UNKNOWN;
4388f060 1889 while ((result=(EResourceType)(result+1)) < RT_RESERVED) {
73c04bcf
A
1890 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1891 break;
1892 }
1893 }
1894 /* Now search for the aliases */
1895 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
1896 result = RT_INTEGER;
1897 }
1898 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
1899 result = RT_BINARY;
1900 }
1901 else if (result == RT_RESERVED) {
1902 char tokenBuffer[1024];
1903 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1904 tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1905 *status = U_INVALID_FORMAT_ERROR;
1906 error(line, "unknown resource type '%s'", tokenBuffer);
1907 }
1908
1909 return result;
1910}
1911
1912/* parse a non-top-level resource */
b75a7d8f 1913static struct SResource *
729e4ab9 1914parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
1915{
1916 enum ETokenType token;
1917 enum EResourceType resType = RT_UNKNOWN;
73c04bcf 1918 ParseResourceFunction *parseFunction = NULL;
b75a7d8f
A
1919 struct UString *tokenValue;
1920 uint32_t startline;
1921 uint32_t line;
1922
4388f060 1923
729e4ab9 1924 token = getToken(state, &tokenValue, NULL, &startline, status);
b75a7d8f
A
1925
1926 if(isVerbose()){
374ca955 1927 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1928 }
1929
1930 /* name . [ ':' type ] '{' resource '}' */
1931 /* This function parses from the colon onwards. If the colon is present, parse the
1932 type then try to parse a resource of that type. If there is no explicit type,
1933 work it out using the lookahead tokens. */
1934 switch (token)
1935 {
1936 case TOK_EOF:
1937 *status = U_INVALID_FORMAT_ERROR;
1938 error(startline, "Unexpected EOF encountered");
1939 return NULL;
1940
1941 case TOK_ERROR:
1942 *status = U_INVALID_FORMAT_ERROR;
1943 return NULL;
1944
1945 case TOK_COLON:
729e4ab9
A
1946 resType = parseResourceType(state, status);
1947 expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
b75a7d8f
A
1948
1949 if (U_FAILURE(*status))
1950 {
1951 return NULL;
1952 }
1953
1954 break;
1955
1956 case TOK_OPEN_BRACE:
1957 break;
1958
1959 default:
1960 *status = U_INVALID_FORMAT_ERROR;
1961 error(startline, "syntax error while reading a resource, expected '{' or ':'");
1962 return NULL;
1963 }
1964
4388f060 1965
b75a7d8f
A
1966 if (resType == RT_UNKNOWN)
1967 {
1968 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
1969 We could have any of the following:
1970 { { => array (nested)
1971 { :/} => array
1972 { string , => string array
1973
b75a7d8f
A
1974 { string { => table
1975
b75a7d8f
A
1976 { string :/{ => table
1977 { string } => string
1978 */
1979
729e4ab9 1980 token = peekToken(state, 0, NULL, &line, NULL,status);
b75a7d8f
A
1981
1982 if (U_FAILURE(*status))
1983 {
1984 return NULL;
1985 }
1986
b75a7d8f
A
1987 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
1988 {
1989 resType = RT_ARRAY;
1990 }
1991 else if (token == TOK_STRING)
1992 {
729e4ab9 1993 token = peekToken(state, 1, NULL, &line, NULL, status);
b75a7d8f
A
1994
1995 if (U_FAILURE(*status))
1996 {
1997 return NULL;
1998 }
1999
2000 switch (token)
2001 {
2002 case TOK_COMMA: resType = RT_ARRAY; break;
2003 case TOK_OPEN_BRACE: resType = RT_TABLE; break;
2004 case TOK_CLOSE_BRACE: resType = RT_STRING; break;
b75a7d8f
A
2005 case TOK_COLON: resType = RT_TABLE; break;
2006 default:
2007 *status = U_INVALID_FORMAT_ERROR;
2008 error(line, "Unexpected token after string, expected ',', '{' or '}'");
2009 return NULL;
2010 }
2011 }
2012 else
2013 {
2014 *status = U_INVALID_FORMAT_ERROR;
2015 error(line, "Unexpected token after '{'");
2016 return NULL;
2017 }
2018
2019 /* printf("Type guessed as %s\n", resourceNames[resType]); */
73c04bcf
A
2020 } else if(resType == RT_TABLE_NO_FALLBACK) {
2021 *status = U_INVALID_FORMAT_ERROR;
2022 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
2023 return NULL;
b75a7d8f
A
2024 }
2025
4388f060 2026
b75a7d8f
A
2027 /* We should now know what we need to parse next, so call the appropriate parser
2028 function and return. */
73c04bcf
A
2029 parseFunction = gResourceTypes[resType].parseFunction;
2030 if (parseFunction != NULL) {
729e4ab9 2031 return parseFunction(state, tag, startline, comment, status);
73c04bcf
A
2032 }
2033 else {
b75a7d8f 2034 *status = U_INTERNAL_PROGRAM_ERROR;
73c04bcf 2035 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
b75a7d8f
A
2036 }
2037
2038 return NULL;
2039}
2040
73c04bcf 2041/* parse the top-level resource */
b75a7d8f 2042struct SRBRoot *
4388f060
A
2043parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UBool makeBinaryCollation,
2044 UErrorCode *status)
b75a7d8f
A
2045{
2046 struct UString *tokenValue;
374ca955 2047 struct UString comment;
b75a7d8f 2048 uint32_t line;
b75a7d8f
A
2049 enum EResourceType bundleType;
2050 enum ETokenType token;
729e4ab9
A
2051 ParseState state;
2052 uint32_t i;
4388f060
A
2053
2054
729e4ab9
A
2055 for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
2056 {
2057 ustr_init(&state.lookahead[i].value);
2058 ustr_init(&state.lookahead[i].comment);
2059 }
b75a7d8f 2060
729e4ab9 2061 initLookahead(&state, buf, status);
b75a7d8f 2062
729e4ab9
A
2063 state.inputdir = inputDir;
2064 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
2065 state.outputdir = outputDir;
2066 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
4388f060 2067 state.makeBinaryCollation = makeBinaryCollation;
374ca955
A
2068
2069 ustr_init(&comment);
729e4ab9 2070 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
b75a7d8f 2071
729e4ab9 2072 state.bundle = bundle_open(&comment, FALSE, status);
b75a7d8f 2073
729e4ab9 2074 if (state.bundle == NULL || U_FAILURE(*status))
b75a7d8f
A
2075 {
2076 return NULL;
2077 }
2078
374ca955 2079
729e4ab9
A
2080 bundle_setlocale(state.bundle, tokenValue->fChars, status);
2081
b75a7d8f 2082 /* The following code is to make Empty bundle work no matter with :table specifer or not */
729e4ab9 2083 token = getToken(&state, NULL, NULL, &line, status);
73c04bcf 2084 if(token==TOK_COLON) {
b75a7d8f 2085 *status=U_ZERO_ERROR;
729e4ab9 2086 bundleType=parseResourceType(&state, status);
b75a7d8f 2087
73c04bcf 2088 if(isTable(bundleType))
b75a7d8f 2089 {
729e4ab9 2090 expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
b75a7d8f
A
2091 }
2092 else
2093 {
2094 *status=U_PARSE_ERROR;
4388f060 2095 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
b75a7d8f
A
2096 }
2097 }
2098 else
2099 {
73c04bcf 2100 /* not a colon */
b75a7d8f
A
2101 if(token==TOK_OPEN_BRACE)
2102 {
2103 *status=U_ZERO_ERROR;
73c04bcf 2104 bundleType=RT_TABLE;
b75a7d8f
A
2105 }
2106 else
2107 {
73c04bcf
A
2108 /* neither colon nor open brace */
2109 *status=U_PARSE_ERROR;
2110 bundleType=RT_UNKNOWN;
b75a7d8f
A
2111 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
2112 }
2113 }
b75a7d8f
A
2114
2115 if (U_FAILURE(*status))
2116 {
729e4ab9 2117 bundle_close(state.bundle, status);
b75a7d8f
A
2118 return NULL;
2119 }
2120
73c04bcf
A
2121 if(bundleType==RT_TABLE_NO_FALLBACK) {
2122 /*
2123 * Parse a top-level table with the table(nofallback) declaration.
2124 * This is the same as a regular table, but also sets the
2125 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2126 */
729e4ab9 2127 state.bundle->noFallback=TRUE;
73c04bcf
A
2128 }
2129 /* top-level tables need not handle special table names like "collations" */
729e4ab9 2130 realParseTable(&state, state.bundle->fRoot, NULL, line, status);
73c04bcf 2131 if(dependencyArray!=NULL){
729e4ab9 2132 table_add(state.bundle->fRoot, dependencyArray, 0, status);
73c04bcf
A
2133 dependencyArray = NULL;
2134 }
4388f060 2135 if (U_FAILURE(*status))
b75a7d8f 2136 {
729e4ab9 2137 bundle_close(state.bundle, status);
46f4442e 2138 res_close(dependencyArray);
b75a7d8f
A
2139 return NULL;
2140 }
2141
729e4ab9 2142 if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
b75a7d8f
A
2143 {
2144 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
2145 if(isStrict()){
2146 *status = U_INVALID_FORMAT_ERROR;
2147 return NULL;
2148 }
2149 }
2150
729e4ab9 2151 cleanupLookahead(&state);
46f4442e 2152 ustr_deinit(&comment);
729e4ab9 2153 return state.bundle;
b75a7d8f 2154}