]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/genrb/parse.c
ICU-8.11.1.tar.gz
[apple/icu.git] / icuSources / tools / genrb / parse.c
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
3*
73c04bcf 4* Copyright (C) 1998-2006, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8*
9* File parse.c
10*
11* Modification History:
12*
13* Date Name Description
14* 05/26/99 stephen Creation.
15* 02/25/00 weiv Overhaul to write udata
16* 5/10/01 Ram removed ustdio dependency
17* 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
18*******************************************************************************
19*/
20
21#include "ucol_imp.h"
22#include "parse.h"
23#include "errmsg.h"
24#include "uhash.h"
25#include "cmemory.h"
26#include "cstring.h"
374ca955 27#include "uinvchar.h"
b75a7d8f
A
28#include "read.h"
29#include "ustr.h"
30#include "reslist.h"
73c04bcf 31#include "rbt_pars.h"
b75a7d8f
A
32#include "unicode/ustring.h"
33#include "unicode/putil.h"
73c04bcf 34#include <stdio.h>
b75a7d8f
A
35
36/* Number of tokens to read ahead of the current stream position */
374ca955 37#define MAX_LOOKAHEAD 3
b75a7d8f 38
b75a7d8f
A
39#define CR 0x000D
40#define LF 0x000A
41#define SPACE 0x0020
374ca955 42#define TAB 0x0009
b75a7d8f
A
43#define ESCAPE 0x005C
44#define HASH 0x0023
45#define QUOTE 0x0027
73c04bcf 46#define ZERO 0x0030
b75a7d8f
A
47#define STARTCOMMAND 0x005B
48#define ENDCOMMAND 0x005D
73c04bcf
A
49#define OPENSQBRACKET 0x005B
50#define CLOSESQBRACKET 0x005D
b75a7d8f 51
73c04bcf
A
52typedef struct SResource *
53ParseResourceFunction(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
b75a7d8f
A
54
55struct Lookahead
56{
57 enum ETokenType type;
374ca955
A
58 struct UString value;
59 struct UString comment;
60 uint32_t line;
b75a7d8f
A
61};
62
63/* keep in sync with token defines in read.h */
374ca955 64const char *tokenNames[TOK_TOKEN_COUNT] =
b75a7d8f
A
65{
66 "string", /* A string token, such as "MonthNames" */
67 "'{'", /* An opening brace character */
68 "'}'", /* A closing brace character */
69 "','", /* A comma */
70 "':'", /* A colon */
71
72 "<end of file>", /* End of the file has been reached successfully */
374ca955 73 "<end of line>"
b75a7d8f
A
74};
75
76/* Just to store "TRUE" */
77static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
78
79static struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
80static uint32_t lookaheadPosition;
81static UCHARBUF *buffer;
82
83static struct SRBRoot *bundle;
84static const char *inputdir;
85static uint32_t inputdirLength;
73c04bcf
A
86static const char *outputdir;
87static uint32_t outputdirLength;
b75a7d8f
A
88
89static UBool gMakeBinaryCollation = TRUE;
90
374ca955 91static struct SResource *parseResource(char *tag, const struct UString *comment, UErrorCode *status);
b75a7d8f 92
b75a7d8f
A
93/* The nature of the lookahead buffer:
94 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
95 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
96 When getToken is called, the current pointer is moved to the next slot and the
97 old slot is filled with the next token from the reader by calling getNextToken.
98 The token values are stored in the slot, which means that token values don't
99 survive a call to getToken, ie.
100
101 UString *value;
102
103 getToken(&value, NULL, status);
104 getToken(NULL, NULL, status); bad - value is now a different string
105*/
106static void
107initLookahead(UCHARBUF *buf, UErrorCode *status)
108{
109 static uint32_t initTypeStrings = 0;
110 uint32_t i;
111
112 if (!initTypeStrings)
113 {
114 initTypeStrings = 1;
115 }
116
117 lookaheadPosition = 0;
118 buffer = buf;
119
120 resetLineNumber();
121
122 for (i = 0; i < MAX_LOOKAHEAD; i++)
123 {
374ca955 124 lookahead[i].type = getNextToken(buffer, &lookahead[i].value, &lookahead[i].line, &lookahead[i].comment, status);
b75a7d8f
A
125 if (U_FAILURE(*status))
126 {
127 return;
128 }
129 }
130
131 *status = U_ZERO_ERROR;
132}
133
134static enum ETokenType
374ca955 135getToken(struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
b75a7d8f
A
136{
137 enum ETokenType result;
138 uint32_t i;
139
140 result = lookahead[lookaheadPosition].type;
141
142 if (tokenValue != NULL)
143 {
144 *tokenValue = &lookahead[lookaheadPosition].value;
145 }
146
147 if (linenumber != NULL)
148 {
149 *linenumber = lookahead[lookaheadPosition].line;
150 }
151
374ca955
A
152 if (comment != NULL)
153 {
154 ustr_cpy(comment, &(lookahead[lookaheadPosition].comment), status);
155 }
156
b75a7d8f
A
157 i = (lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
158 lookaheadPosition = (lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
374ca955
A
159 ustr_setlen(&lookahead[i].comment, 0, status);
160 ustr_setlen(&lookahead[i].value, 0, status);
161 lookahead[i].type = getNextToken(buffer, &lookahead[i].value, &lookahead[i].line, &lookahead[i].comment, status);
b75a7d8f
A
162
163 /* printf("getToken, returning %s\n", tokenNames[result]); */
164
165 return result;
166}
167
168static enum ETokenType
374ca955 169peekToken(uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
b75a7d8f
A
170{
171 uint32_t i = (lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
172
173 if (U_FAILURE(*status))
174 {
175 return TOK_ERROR;
176 }
177
178 if (lookaheadCount >= MAX_LOOKAHEAD)
179 {
180 *status = U_INTERNAL_PROGRAM_ERROR;
181 return TOK_ERROR;
182 }
183
184 if (tokenValue != NULL)
185 {
186 *tokenValue = &lookahead[i].value;
187 }
188
189 if (linenumber != NULL)
190 {
191 *linenumber = lookahead[i].line;
192 }
193
374ca955
A
194 if(comment != NULL){
195 ustr_cpy(comment, &(lookahead[lookaheadPosition].comment), status);
196 }
197
b75a7d8f
A
198 return lookahead[i].type;
199}
200
201static void
374ca955 202expect(enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
b75a7d8f
A
203{
204 uint32_t line;
374ca955
A
205
206 enum ETokenType token = getToken(tokenValue, comment, &line, status);
b75a7d8f 207
73c04bcf 208 if (linenumber != NULL)
b75a7d8f 209 {
73c04bcf 210 *linenumber = line;
b75a7d8f
A
211 }
212
73c04bcf 213 if (U_FAILURE(*status))
b75a7d8f 214 {
73c04bcf 215 return;
b75a7d8f
A
216 }
217
218 if (token != expectedToken)
219 {
220 *status = U_INVALID_FORMAT_ERROR;
221 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
222 }
223 else /* "else" is added by Jing/GCL */
224 {
225 *status = U_ZERO_ERROR;
226 }
227}
228
374ca955 229static char *getInvariantString(uint32_t *line, struct UString *comment, UErrorCode *status)
b75a7d8f
A
230{
231 struct UString *tokenValue;
232 char *result;
233 uint32_t count;
234
374ca955 235 expect(TOK_STRING, &tokenValue, comment, line, status);
b75a7d8f
A
236
237 if (U_FAILURE(*status))
238 {
239 return NULL;
240 }
241
374ca955
A
242 count = u_strlen(tokenValue->fChars);
243 if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
244 *status = U_INVALID_FORMAT_ERROR;
245 error(*line, "invariant characters required for table keys, binary data, etc.");
246 return NULL;
247 }
248
249 result = uprv_malloc(count+1);
b75a7d8f
A
250
251 if (result == NULL)
252 {
253 *status = U_MEMORY_ALLOCATION_ERROR;
254 return NULL;
255 }
256
374ca955 257 u_UCharsToChars(tokenValue->fChars, result, count+1);
b75a7d8f
A
258 return result;
259}
260
b75a7d8f 261static struct SResource *
73c04bcf 262parseUCARules(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
b75a7d8f
A
263{
264 struct SResource *result = NULL;
265 struct UString *tokenValue;
266 FileStream *file = NULL;
267 char filename[256] = { '\0' };
268 char cs[128] = { '\0' };
269 uint32_t line;
270 int len=0;
271 UBool quoted = FALSE;
272 UCHARBUF *ucbuf=NULL;
273 UChar32 c = 0;
274 const char* cp = NULL;
275 UChar *pTarget = NULL;
276 UChar *target = NULL;
277 UChar *targetLimit = NULL;
278 int32_t size = 0;
279
73c04bcf 280 expect(TOK_STRING, &tokenValue, NULL, &line, status);
b75a7d8f
A
281
282 if(isVerbose()){
374ca955 283 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
284 }
285
286 if (U_FAILURE(*status))
287 {
288 return NULL;
289 }
290 /* make the filename including the directory */
291 if (inputdir != NULL)
292 {
293 uprv_strcat(filename, inputdir);
294
295 if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
296 {
297 uprv_strcat(filename, U_FILE_SEP_STRING);
298 }
299 }
300
301 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
302
374ca955 303 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
304
305 if (U_FAILURE(*status))
306 {
307 return NULL;
308 }
b75a7d8f
A
309 uprv_strcat(filename, cs);
310
311
312 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
313
314 if (U_FAILURE(*status)) {
315 error(line, "An error occured while opening the input file %s\n", filename);
316 return NULL;
317 }
318
319 /* We allocate more space than actually required
320 * since the actual size needed for storing UChars
321 * is not known in UTF-8 byte stream
322 */
73c04bcf 323 size = ucbuf_size(ucbuf) + 1;
b75a7d8f 324 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
374ca955 325 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
b75a7d8f
A
326 target = pTarget;
327 targetLimit = pTarget+size;
328
329 /* read the rules into the buffer */
330 while (target < targetLimit)
331 {
332 c = ucbuf_getc(ucbuf, status);
333 if(c == QUOTE) {
73c04bcf 334 quoted = (UBool)!quoted;
b75a7d8f
A
335 }
336 /* weiv (06/26/2002): adding the following:
337 * - preserving spaces in commands [...]
338 * - # comments until the end of line
339 */
374ca955 340 if (c == STARTCOMMAND && !quoted)
b75a7d8f 341 {
73c04bcf
A
342 /* preserve commands
343 * closing bracket will be handled by the
344 * append at the end of the loop
345 */
346 while(c != ENDCOMMAND) {
347 U_APPEND_CHAR32(c, target,len);
348 c = ucbuf_getc(ucbuf, status);
349 }
350 }
351 else if (c == HASH && !quoted) {
352 /* skip comments */
353 while(c != CR && c != LF) {
354 c = ucbuf_getc(ucbuf, status);
355 }
356 continue;
357 }
358 else if (c == ESCAPE)
b75a7d8f
A
359 {
360 c = unescape(ucbuf, status);
361
362 if (c == U_ERR)
363 {
364 uprv_free(pTarget);
365 T_FileStream_close(file);
366 return NULL;
367 }
368 }
374ca955 369 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
b75a7d8f 370 {
73c04bcf
A
371 /* ignore spaces carriage returns
372 * and line feed unless in the form \uXXXX
b75a7d8f
A
373 */
374 continue;
375 }
376
377 /* Append UChar * after dissembling if c > 0xffff*/
378 if (c != U_EOF)
379 {
380 U_APPEND_CHAR32(c, target,len);
381 }
382 else
383 {
384 break;
385 }
386 }
387
374ca955
A
388 /* terminate the string */
389 if(target < targetLimit){
390 *target = 0x0000;
391 }
392
393 result = string_open(bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
394
b75a7d8f
A
395
396 ucbuf_close(ucbuf);
397 uprv_free(pTarget);
398 T_FileStream_close(file);
399
400 return result;
401}
402
73c04bcf
A
403static struct SResource *
404parseTransliterator(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
405{
406 struct SResource *result = NULL;
407 struct UString *tokenValue;
408 FileStream *file = NULL;
409 char filename[256] = { '\0' };
410 char cs[128] = { '\0' };
411 uint32_t line;
412 UCHARBUF *ucbuf=NULL;
413 const char* cp = NULL;
414 UChar *pTarget = NULL;
415 const UChar *pSource = NULL;
416 int32_t size = 0;
417
418 expect(TOK_STRING, &tokenValue, NULL, &line, status);
419
420 if(isVerbose()){
421 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
422 }
423
424 if (U_FAILURE(*status))
425 {
426 return NULL;
427 }
428 /* make the filename including the directory */
429 if (inputdir != NULL)
430 {
431 uprv_strcat(filename, inputdir);
432
433 if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
434 {
435 uprv_strcat(filename, U_FILE_SEP_STRING);
436 }
437 }
438
439 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
440
441 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
442
443 if (U_FAILURE(*status))
444 {
445 return NULL;
446 }
447 uprv_strcat(filename, cs);
448
449
450 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
451
452 if (U_FAILURE(*status)) {
453 error(line, "An error occured while opening the input file %s\n", filename);
454 return NULL;
455 }
456
457 /* We allocate more space than actually required
458 * since the actual size needed for storing UChars
459 * is not known in UTF-8 byte stream
460 */
461 pSource = ucbuf_getBuffer(ucbuf, &size, status);
462 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
463 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
464
465#if !UCONFIG_NO_TRANSLITERATION
466 size = utrans_stripRules(pSource, size, pTarget, status);
467#else
468 size = 0;
469 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
470#endif
471 result = string_open(bundle, tag, pTarget, size, NULL, status);
472
473 ucbuf_close(ucbuf);
474 uprv_free(pTarget);
475 T_FileStream_close(file);
476
477 return result;
478}
479static struct SResource* dependencyArray = NULL;
480
481static struct SResource *
482parseDependency(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
483{
484 struct SResource *result = NULL;
485 struct SResource *elem = NULL;
486 struct UString *tokenValue;
487 uint32_t line;
488 char filename[256] = { '\0' };
489 char cs[128] = { '\0' };
490
491 expect(TOK_STRING, &tokenValue, NULL, &line, status);
492
493 if(isVerbose()){
494 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
495 }
496
497 if (U_FAILURE(*status))
498 {
499 return NULL;
500 }
501 /* make the filename including the directory */
502 if (outputdir != NULL)
503 {
504 uprv_strcat(filename, outputdir);
505
506 if (outputdir[outputdirLength - 1] != U_FILE_SEP_CHAR)
507 {
508 uprv_strcat(filename, U_FILE_SEP_STRING);
509 }
510 }
511
512 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
513
514 if (U_FAILURE(*status))
515 {
516 return NULL;
517 }
518 uprv_strcat(filename, cs);
519 if(!T_FileStream_file_exists(filename)){
520 if(isStrict()){
521 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
522 }else{
523 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
524 }
525 }
526 if(dependencyArray==NULL){
527 dependencyArray = array_open(bundle, "%%DEPENDENCY", NULL, status);
528 }
529 if(tag!=NULL){
530 result = string_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
531 }
532 elem = string_open(bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
533
534 array_add(dependencyArray, elem, status);
535
536 if (U_FAILURE(*status))
537 {
538 return NULL;
539 }
540 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
541 return result;
542}
b75a7d8f 543static struct SResource *
374ca955 544parseString(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
b75a7d8f
A
545{
546 struct UString *tokenValue;
547 struct SResource *result = NULL;
548
73c04bcf 549/* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
b75a7d8f
A
550 {
551 return parseUCARules(tag, startline, status);
73c04bcf 552 }*/
b75a7d8f 553 if(isVerbose()){
374ca955 554 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f 555 }
374ca955 556 expect(TOK_STRING, &tokenValue, NULL, NULL, status);
b75a7d8f
A
557
558 if (U_SUCCESS(*status))
559 {
560 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
561 doesn't survive expect either) */
562
374ca955 563 result = string_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
b75a7d8f 564 if(U_SUCCESS(*status) && result) {
374ca955 565 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
566
567 if (U_FAILURE(*status))
568 {
569 string_close(result, status);
570 return NULL;
571 }
572 }
573 }
574
575 return result;
576}
577
578static struct SResource *
374ca955 579parseAlias(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
580{
581 struct UString *tokenValue;
374ca955 582 struct SResource *result = NULL;
b75a7d8f 583
374ca955 584 expect(TOK_STRING, &tokenValue, NULL, NULL, status);
b75a7d8f
A
585
586 if(isVerbose()){
374ca955 587 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
588 }
589
590 if (U_SUCCESS(*status))
591 {
592 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
593 doesn't survive expect either) */
594
374ca955 595 result = alias_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
b75a7d8f 596
374ca955 597 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
598
599 if (U_FAILURE(*status))
600 {
601 alias_close(result, status);
602 return NULL;
603 }
604 }
605
606 return result;
607}
608
609static struct SResource *
374ca955 610addCollation(struct SResource *result, uint32_t startline, UErrorCode *status)
b75a7d8f 611{
b75a7d8f
A
612 struct SResource *member = NULL;
613 struct UString *tokenValue;
374ca955 614 struct UString comment;
b75a7d8f
A
615 enum ETokenType token;
616 char subtag[1024];
617 UVersionInfo version;
618 UBool override = FALSE;
619 uint32_t line;
b75a7d8f 620 /* '{' . (name resource)* '}' */
374ca955
A
621 version[0]=0; version[1]=0; version[2]=0; version[3]=0;
622
b75a7d8f
A
623 for (;;)
624 {
374ca955
A
625 ustr_init(&comment);
626 token = getToken(&tokenValue, &comment, &line, status);
b75a7d8f
A
627
628 if (token == TOK_CLOSE_BRACE)
629 {
630 return result;
631 }
632
633 if (token != TOK_STRING)
634 {
635 table_close(result, status);
636 *status = U_INVALID_FORMAT_ERROR;
637
638 if (token == TOK_EOF)
639 {
640 error(startline, "unterminated table");
641 }
642 else
643 {
644 error(line, "Unexpected token %s", tokenNames[token]);
645 }
646
647 return NULL;
648 }
649
650 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
651
652 if (U_FAILURE(*status))
653 {
654 table_close(result, status);
655 return NULL;
656 }
657
374ca955 658 member = parseResource(subtag, NULL, status);
b75a7d8f
A
659
660 if (U_FAILURE(*status))
661 {
662 table_close(result, status);
663 return NULL;
664 }
665
666 if (uprv_strcmp(subtag, "Version") == 0)
667 {
668 char ver[40];
669 int32_t length = member->u.fString.fLength;
670
671 if (length >= (int32_t) sizeof(ver))
672 {
673 length = (int32_t) sizeof(ver) - 1;
674 }
675
676 u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
677 u_versionFromString(version, ver);
678
679 table_add(result, member, line, status);
680
681 }
682 else if (uprv_strcmp(subtag, "Override") == 0)
683 {
684 override = FALSE;
685
686 if (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0)
687 {
688 override = TRUE;
689 }
690 table_add(result, member, line, status);
691
692 }
693 else if(uprv_strcmp(subtag, "%%CollationBin")==0)
694 {
695 /* discard duplicate %%CollationBin if any*/
696 }
697 else if (uprv_strcmp(subtag, "Sequence") == 0)
698 {
699#if UCONFIG_NO_COLLATION
700 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION, see uconfig.h");
701#else
702 /* first we add the "Sequence", so that we always have rules */
703 table_add(result, member, line, status);
704 if(gMakeBinaryCollation) {
705 UErrorCode intStatus = U_ZERO_ERROR;
706
707 /* do the collation elements */
708 int32_t len = 0;
709 uint8_t *data = NULL;
710 UCollator *coll = NULL;
711 UParseError parseError;
712 /* add sequence */
713 /*table_add(result, member, line, status);*/
714
715 coll = ucol_openRules(member->u.fString.fChars, member->u.fString.fLength,
716 UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, &intStatus);
717
718 if (U_SUCCESS(intStatus) && coll != NULL)
719 {
374ca955
A
720 len = ucol_cloneBinary(coll, NULL, 0, &intStatus);
721 data = (uint8_t *)uprv_malloc(len);
73c04bcf 722 intStatus = U_ZERO_ERROR;
374ca955
A
723 len = ucol_cloneBinary(coll, data, len, &intStatus);
724 /*data = ucol_cloneRuleData(coll, &len, &intStatus);*/
b75a7d8f
A
725
726 /* tailoring rules version */
727 /* This is wrong! */
728 /*coll->dataInfo.dataVersion[1] = version[0];*/
729 /* Copy tailoring version. Builder version already */
730 /* set in ucol_openRules */
731 ((UCATableHeader *)data)->version[1] = version[0];
732 ((UCATableHeader *)data)->version[2] = version[1];
733 ((UCATableHeader *)data)->version[3] = version[2];
734
735 if (U_SUCCESS(intStatus) && data != NULL)
736 {
374ca955 737 member = bin_open(bundle, "%%CollationBin", len, data, NULL, NULL, status);
b75a7d8f
A
738 /*table_add(bundle->fRoot, member, line, status);*/
739 table_add(result, member, line, status);
740 uprv_free(data);
741 }
742 else
743 {
744 warning(line, "could not obtain rules from collator");
745 if(isStrict()){
746 *status = U_INVALID_FORMAT_ERROR;
747 return NULL;
748 }
749 }
750
751 ucol_close(coll);
752 }
753 else
754 {
755 warning(line, "%%Collation could not be constructed from CollationElements - check context!");
756 if(isStrict()){
374ca955 757 *status = intStatus;
b75a7d8f
A
758 return NULL;
759 }
760 }
761 } else {
762 if(isVerbose()) {
763 printf("Not building Collation binary\n");
764 }
765 }
766#endif
767 }
768
769 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
770
771 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
772
773 if (U_FAILURE(*status))
774 {
775 table_close(result, status);
776 return NULL;
777 }
778 }
779
780 /* not reached */
781 /* A compiler warning will appear if all paths don't contain a return statement. */
782/* *status = U_INTERNAL_PROGRAM_ERROR;
783 return NULL;*/
784}
785
374ca955
A
786static struct SResource *
787parseCollationElements(char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
788{
789 struct SResource *result = NULL;
790 struct SResource *member = NULL;
791 struct SResource *collationRes = NULL;
792 struct UString *tokenValue;
793 struct UString comment;
794 enum ETokenType token;
795 char subtag[1024], typeKeyword[1024];
796 uint32_t line;
797
798 result = table_open(bundle, tag, NULL, status);
799
800 if (result == NULL || U_FAILURE(*status))
801 {
802 return NULL;
803 }
804 if(isVerbose()){
805 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
806 }
807 if(!newCollation) {
73c04bcf
A
808 return addCollation(result, startline, status);
809 }
810 else {
811 for(;;) {
812 ustr_init(&comment);
813 token = getToken(&tokenValue, &comment, &line, status);
374ca955 814
73c04bcf 815 if (token == TOK_CLOSE_BRACE)
374ca955 816 {
73c04bcf 817 return result;
374ca955 818 }
73c04bcf
A
819
820 if (token != TOK_STRING)
374ca955 821 {
73c04bcf
A
822 table_close(result, status);
823 *status = U_INVALID_FORMAT_ERROR;
824
825 if (token == TOK_EOF)
826 {
827 error(startline, "unterminated table");
828 }
829 else
830 {
831 error(line, "Unexpected token %s", tokenNames[token]);
832 }
833
834 return NULL;
374ca955
A
835 }
836
73c04bcf 837 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
374ca955 838
73c04bcf
A
839 if (U_FAILURE(*status))
840 {
841 table_close(result, status);
842 return NULL;
843 }
374ca955 844
73c04bcf
A
845 if (uprv_strcmp(subtag, "default") == 0)
846 {
847 member = parseResource(subtag, NULL, status);
374ca955 848
73c04bcf
A
849 if (U_FAILURE(*status))
850 {
851 table_close(result, status);
852 return NULL;
853 }
374ca955 854
73c04bcf
A
855 table_add(result, member, line, status);
856 }
857 else
858 {
859 token = peekToken(0, &tokenValue, &line, &comment, status);
860 /* this probably needs to be refactored or recursively use the parser */
861 /* first we assume that our collation table won't have the explicit type */
862 /* then, we cannot handle aliases */
863 if(token == TOK_OPEN_BRACE) {
864 token = getToken(&tokenValue, &comment, &line, status);
865 collationRes = table_open(bundle, subtag, NULL, status);
866 table_add(result, addCollation(collationRes, startline, status), startline, status);
867 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
868 /* we could have a table too */
869 token = peekToken(1, &tokenValue, &line, &comment, status);
870 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
871 if(uprv_strcmp(typeKeyword, "alias") == 0) {
872 member = parseResource(subtag, NULL, status);
873
874 if (U_FAILURE(*status))
875 {
876 table_close(result, status);
877 return NULL;
878 }
374ca955 879
73c04bcf
A
880 table_add(result, member, line, status);
881 } else {
882 *status = U_INVALID_FORMAT_ERROR;
883 return NULL;
884 }
885 } else {
886 *status = U_INVALID_FORMAT_ERROR;
887 return NULL;
888 }
374ca955 889 }
374ca955 890
73c04bcf 891 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
374ca955 892
73c04bcf 893 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
374ca955 894
73c04bcf
A
895 if (U_FAILURE(*status))
896 {
897 table_close(result, status);
898 return NULL;
899 }
374ca955 900 }
374ca955
A
901 }
902}
903
b75a7d8f
A
904/* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
905 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
906static struct SResource *
907realParseTable(struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
908{
909 struct SResource *member = NULL;
910 struct UString *tokenValue=NULL;
374ca955 911 struct UString comment;
b75a7d8f
A
912 enum ETokenType token;
913 char subtag[1024];
914 uint32_t line;
915 UBool readToken = FALSE;
916
917 /* '{' . (name resource)* '}' */
918 if(isVerbose()){
374ca955 919 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
920 }
921 for (;;)
922 {
374ca955
A
923 ustr_init(&comment);
924 token = getToken(&tokenValue, &comment, &line, status);
b75a7d8f
A
925
926 if (token == TOK_CLOSE_BRACE)
927 {
928 if (!readToken) {
929 warning(startline, "Encountered empty table");
930 }
931 return table;
932 }
933
934 if (token != TOK_STRING)
935 {
936 table_close(table, status);
937 *status = U_INVALID_FORMAT_ERROR;
938
939 if (token == TOK_EOF)
940 {
941 error(startline, "unterminated table");
942 }
943 else
944 {
374ca955 945 error(line, "unexpected token %s", tokenNames[token]);
b75a7d8f
A
946 }
947
948 return NULL;
949 }
950
374ca955
A
951 if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
952 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
953 } else {
954 *status = U_INVALID_FORMAT_ERROR;
955 error(line, "invariant characters required for table keys");
956 table_close(table, status);
957 return NULL;
958 }
b75a7d8f
A
959
960 if (U_FAILURE(*status))
961 {
962 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
963 table_close(table, status);
964 return NULL;
965 }
966
374ca955 967 member = parseResource(subtag, &comment, status);
b75a7d8f
A
968
969 if (member == NULL || U_FAILURE(*status))
970 {
971 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
972 table_close(table, status);
973 return NULL;
974 }
975
976 table_add(table, member, line, status);
977
978 if (U_FAILURE(*status))
979 {
980 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
981 table_close(table, status);
982 return NULL;
983 }
984 readToken = TRUE;
985 }
986
987 /* not reached */
988 /* A compiler warning will appear if all paths don't contain a return statement. */
989/* *status = U_INTERNAL_PROGRAM_ERROR;
990 return NULL;*/
991}
992
993static struct SResource *
374ca955 994parseTable(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
995{
996 struct SResource *result;
997
998 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
999 {
374ca955
A
1000 return parseCollationElements(tag, startline, FALSE, status);
1001 }
1002 if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1003 {
1004 return parseCollationElements(tag, startline, TRUE, status);
b75a7d8f
A
1005 }
1006 if(isVerbose()){
374ca955 1007 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f 1008 }
374ca955
A
1009
1010 result = table_open(bundle, tag, comment, status);
b75a7d8f
A
1011
1012 if (result == NULL || U_FAILURE(*status))
1013 {
1014 return NULL;
1015 }
1016
374ca955 1017 return realParseTable(result, tag, startline, status);
b75a7d8f
A
1018}
1019
1020static struct SResource *
374ca955 1021parseArray(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
1022{
1023 struct SResource *result = NULL;
1024 struct SResource *member = NULL;
1025 struct UString *tokenValue;
374ca955 1026 struct UString memberComments;
b75a7d8f
A
1027 enum ETokenType token;
1028 UBool readToken = FALSE;
1029
374ca955 1030 result = array_open(bundle, tag, comment, status);
b75a7d8f
A
1031
1032 if (result == NULL || U_FAILURE(*status))
1033 {
1034 return NULL;
1035 }
1036 if(isVerbose()){
374ca955 1037 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f 1038 }
374ca955
A
1039
1040 ustr_init(&memberComments);
1041
b75a7d8f
A
1042 /* '{' . resource [','] '}' */
1043 for (;;)
1044 {
374ca955
A
1045 /* reset length */
1046 ustr_setlen(&memberComments, 0, status);
1047
b75a7d8f 1048 /* check for end of array, but don't consume next token unless it really is the end */
374ca955
A
1049 token = peekToken(0, &tokenValue, NULL, &memberComments, status);
1050
b75a7d8f
A
1051
1052 if (token == TOK_CLOSE_BRACE)
1053 {
374ca955 1054 getToken(NULL, NULL, NULL, status);
b75a7d8f
A
1055 if (!readToken) {
1056 warning(startline, "Encountered empty array");
1057 }
1058 break;
1059 }
1060
1061 if (token == TOK_EOF)
1062 {
1063 array_close(result, status);
1064 *status = U_INVALID_FORMAT_ERROR;
1065 error(startline, "unterminated array");
1066 return NULL;
1067 }
1068
1069 /* string arrays are a special case */
1070 if (token == TOK_STRING)
1071 {
374ca955
A
1072 getToken(&tokenValue, &memberComments, NULL, status);
1073 member = string_open(bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
b75a7d8f
A
1074 }
1075 else
1076 {
374ca955 1077 member = parseResource(NULL, &memberComments, status);
b75a7d8f
A
1078 }
1079
1080 if (member == NULL || U_FAILURE(*status))
1081 {
1082 array_close(result, status);
1083 return NULL;
1084 }
1085
1086 array_add(result, member, status);
1087
1088 if (U_FAILURE(*status))
1089 {
1090 array_close(result, status);
1091 return NULL;
1092 }
1093
1094 /* eat optional comma if present */
374ca955 1095 token = peekToken(0, NULL, NULL, NULL, status);
b75a7d8f
A
1096
1097 if (token == TOK_COMMA)
1098 {
374ca955 1099 getToken(NULL, NULL, NULL, status);
b75a7d8f
A
1100 }
1101
1102 if (U_FAILURE(*status))
1103 {
1104 array_close(result, status);
1105 return NULL;
1106 }
1107 readToken = TRUE;
1108 }
1109
1110 return result;
1111}
1112
1113static struct SResource *
374ca955 1114parseIntVector(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
1115{
1116 struct SResource *result = NULL;
1117 enum ETokenType token;
1118 char *string;
1119 int32_t value;
1120 UBool readToken = FALSE;
1121 /* added by Jing/GCL */
1122 char *stopstring;
1123 uint32_t len;
374ca955 1124 struct UString memberComments;
b75a7d8f 1125
374ca955 1126 result = intvector_open(bundle, tag, comment, status);
b75a7d8f
A
1127
1128 if (result == NULL || U_FAILURE(*status))
1129 {
1130 return NULL;
1131 }
1132
1133 if(isVerbose()){
374ca955 1134 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f 1135 }
374ca955 1136 ustr_init(&memberComments);
b75a7d8f
A
1137 /* '{' . string [','] '}' */
1138 for (;;)
1139 {
374ca955
A
1140 ustr_setlen(&memberComments, 0, status);
1141
b75a7d8f 1142 /* check for end of array, but don't consume next token unless it really is the end */
374ca955 1143 token = peekToken(0, NULL, NULL,&memberComments, status);
b75a7d8f
A
1144
1145 if (token == TOK_CLOSE_BRACE)
1146 {
1147 /* it's the end, consume the close brace */
374ca955 1148 getToken(NULL, NULL, NULL, status);
b75a7d8f
A
1149 if (!readToken) {
1150 warning(startline, "Encountered empty int vector");
1151 }
1152 return result;
1153 }
1154
374ca955 1155 string = getInvariantString(NULL, NULL, status);
b75a7d8f
A
1156
1157 if (U_FAILURE(*status))
1158 {
1159 intvector_close(result, status);
1160 return NULL;
1161 }
1162 /* Commented by Jing/GCL */
1163 /*value = uprv_strtol(string, NULL, 10);
1164 intvector_add(result, value, status);
1165
1166 uprv_free(string);
1167
1168 token = peekToken(0, NULL, NULL, status);*/
1169
1170 /* The following is added by Jing/GCL to handle illegal char in the Intvector */
1171 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
374ca955 1172 len=(uint32_t)(stopstring-string);
b75a7d8f
A
1173
1174 if(len==uprv_strlen(string))
1175 {
1176 intvector_add(result, value, status);
1177 uprv_free(string);
374ca955 1178 token = peekToken(0, NULL, NULL, NULL, status);
b75a7d8f
A
1179 }
1180 else
1181 {
1182 uprv_free(string);
1183 *status=U_INVALID_CHAR_FOUND;
1184 }
1185 /* The above is added by Jing/GCL */
1186
1187 if (U_FAILURE(*status))
1188 {
1189 intvector_close(result, status);
1190 return NULL;
1191 }
1192
1193 /* the comma is optional (even though it is required to prevent the reader from concatenating
1194 consecutive entries) so that a missing comma on the last entry isn't an error */
1195 if (token == TOK_COMMA)
1196 {
374ca955 1197 getToken(NULL, NULL, NULL, status);
b75a7d8f
A
1198 }
1199 readToken = TRUE;
1200 }
1201
1202 /* not reached */
1203 /* A compiler warning will appear if all paths don't contain a return statement. */
1204/* intvector_close(result, status);
1205 *status = U_INTERNAL_PROGRAM_ERROR;
1206 return NULL;*/
1207}
1208
1209static struct SResource *
374ca955 1210parseBinary(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
1211{
1212 struct SResource *result = NULL;
1213 uint8_t *value;
1214 char *string;
1215 char toConv[3] = {'\0', '\0', '\0'};
1216 uint32_t count;
1217 uint32_t i;
1218 uint32_t line;
1219 /* added by Jing/GCL */
1220 char *stopstring;
1221 uint32_t len;
1222
374ca955 1223 string = getInvariantString(&line, NULL, status);
b75a7d8f
A
1224
1225 if (string == NULL || U_FAILURE(*status))
1226 {
1227 return NULL;
1228 }
1229
374ca955 1230 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
1231
1232 if (U_FAILURE(*status))
1233 {
1234 uprv_free(string);
1235 return NULL;
1236 }
1237
1238 if(isVerbose()){
374ca955 1239 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1240 }
1241
374ca955 1242 count = (uint32_t)uprv_strlen(string);
b75a7d8f
A
1243 if (count > 0){
1244 if((count % 2)==0){
1245 value = uprv_malloc(sizeof(uint8_t) * count);
1246
1247 if (value == NULL)
1248 {
1249 uprv_free(string);
1250 *status = U_MEMORY_ALLOCATION_ERROR;
1251 return NULL;
1252 }
1253
1254 for (i = 0; i < count; i += 2)
1255 {
1256 toConv[0] = string[i];
1257 toConv[1] = string[i + 1];
1258
1259 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
374ca955 1260 len=(uint32_t)(stopstring-toConv);
b75a7d8f
A
1261
1262 if(len!=uprv_strlen(toConv))
1263 {
1264 uprv_free(string);
1265 *status=U_INVALID_CHAR_FOUND;
1266 return NULL;
1267 }
1268 }
1269
374ca955 1270 result = bin_open(bundle, tag, (i >> 1), value,NULL, comment, status);
b75a7d8f
A
1271
1272 uprv_free(value);
1273 }
1274 else
1275 {
1276 *status = U_INVALID_CHAR_FOUND;
1277 uprv_free(string);
1278 error(line, "Encountered invalid binary string");
1279 return NULL;
1280 }
1281 }
1282 else
1283 {
374ca955 1284 result = bin_open(bundle, tag, 0, NULL, "",comment,status);
b75a7d8f
A
1285 warning(startline, "Encountered empty binary tag");
1286 }
1287 uprv_free(string);
1288
1289 return result;
1290}
1291
1292static struct SResource *
374ca955 1293parseInteger(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
1294{
1295 struct SResource *result = NULL;
1296 int32_t value;
1297 char *string;
1298 /* added by Jing/GCL */
1299 char *stopstring;
1300 uint32_t len;
1301
374ca955 1302 string = getInvariantString(NULL, NULL, status);
b75a7d8f
A
1303
1304 if (string == NULL || U_FAILURE(*status))
1305 {
1306 return NULL;
1307 }
1308
374ca955 1309 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
1310
1311 if (U_FAILURE(*status))
1312 {
1313 uprv_free(string);
1314 return NULL;
1315 }
1316
1317 if(isVerbose()){
374ca955 1318 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1319 }
1320
1321 if (uprv_strlen(string) <= 0)
1322 {
1323 warning(startline, "Encountered empty integer. Default value is 0.");
1324 }
1325
1326 /* commented by Jing/GCL */
1327 /* value = uprv_strtol(string, NULL, 10);*/
1328 /* result = int_open(bundle, tag, value, status);*/
1329 /* The following is added by Jing/GCL*/
1330 /* to make integer support hexdecimal, octal digit and decimal*/
1331 /* to handle illegal char in the integer*/
1332 value = uprv_strtoul(string, &stopstring, 0);
374ca955 1333 len=(uint32_t)(stopstring-string);
b75a7d8f
A
1334 if(len==uprv_strlen(string))
1335 {
374ca955 1336 result = int_open(bundle, tag, value, comment, status);
b75a7d8f
A
1337 }
1338 else
1339 {
1340 *status=U_INVALID_CHAR_FOUND;
1341 }
1342 uprv_free(string);
1343
1344 return result;
1345}
1346
1347static struct SResource *
374ca955 1348parseImport(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
b75a7d8f
A
1349{
1350 struct SResource *result;
1351 FileStream *file;
1352 int32_t len;
1353 uint8_t *data;
1354 char *filename;
1355 uint32_t line;
1356 char *fullname = NULL;
1357 int32_t numRead = 0;
374ca955 1358 filename = getInvariantString(&line, NULL, status);
b75a7d8f
A
1359
1360 if (U_FAILURE(*status))
1361 {
1362 return NULL;
1363 }
1364
374ca955 1365 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
1366
1367 if (U_FAILURE(*status))
1368 {
1369 uprv_free(filename);
1370 return NULL;
1371 }
1372
1373 if(isVerbose()){
374ca955 1374 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1375 }
1376
1377 /* Open the input file for reading */
1378 if (inputdir == NULL)
1379 {
1380 file = T_FileStream_open(filename, "rb");
1381 }
1382 else
1383 {
1384
374ca955 1385 int32_t count = (int32_t)uprv_strlen(filename);
b75a7d8f
A
1386
1387 if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
1388 {
1389 fullname = (char *) uprv_malloc(inputdirLength + count + 2);
1390
1391 /* test for NULL */
1392 if(fullname == NULL)
1393 {
1394 *status = U_MEMORY_ALLOCATION_ERROR;
1395 return NULL;
1396 }
1397
1398 uprv_strcpy(fullname, inputdir);
1399
1400 fullname[inputdirLength] = U_FILE_SEP_CHAR;
1401 fullname[inputdirLength + 1] = '\0';
1402
1403 uprv_strcat(fullname, filename);
1404 }
1405 else
1406 {
1407 fullname = (char *) uprv_malloc(inputdirLength + count + 1);
1408
1409 /* test for NULL */
1410 if(fullname == NULL)
1411 {
1412 *status = U_MEMORY_ALLOCATION_ERROR;
1413 return NULL;
1414 }
1415
1416 uprv_strcpy(fullname, inputdir);
1417 uprv_strcat(fullname, filename);
1418 }
1419
1420 file = T_FileStream_open(fullname, "rb");
1421
1422 }
1423
1424 if (file == NULL)
1425 {
1426 error(line, "couldn't open input file %s", filename);
1427 *status = U_FILE_ACCESS_ERROR;
1428 return NULL;
1429 }
1430
1431 len = T_FileStream_size(file);
1432 data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
1433 /* test for NULL */
1434 if(data == NULL)
1435 {
1436 *status = U_MEMORY_ALLOCATION_ERROR;
1437 T_FileStream_close (file);
1438 return NULL;
1439 }
1440
1441 numRead = T_FileStream_read (file, data, len);
1442 T_FileStream_close (file);
1443
374ca955 1444 result = bin_open(bundle, tag, len, data, fullname, comment, status);
b75a7d8f
A
1445
1446 uprv_free(data);
1447 uprv_free(filename);
1448 uprv_free(fullname);
1449
1450 return result;
1451}
1452
1453static struct SResource *
374ca955 1454parseInclude(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
b75a7d8f
A
1455{
1456 struct SResource *result;
1457 int32_t len=0;
1458 char *filename;
1459 uint32_t line;
1460 UChar *pTarget = NULL;
1461
1462 UCHARBUF *ucbuf;
1463 char *fullname = NULL;
1464 int32_t count = 0;
1465 const char* cp = NULL;
1466 const UChar* uBuffer = NULL;
1467
374ca955
A
1468 filename = getInvariantString(&line, NULL, status);
1469 count = (int32_t)uprv_strlen(filename);
b75a7d8f
A
1470
1471 if (U_FAILURE(*status))
1472 {
1473 return NULL;
1474 }
1475
374ca955 1476 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
1477
1478 if (U_FAILURE(*status))
1479 {
1480 uprv_free(filename);
1481 return NULL;
1482 }
1483
1484 if(isVerbose()){
374ca955 1485 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1486 }
1487
1488 fullname = (char *) uprv_malloc(inputdirLength + count + 2);
1489 /* test for NULL */
1490 if(fullname == NULL)
1491 {
1492 *status = U_MEMORY_ALLOCATION_ERROR;
1493 uprv_free(filename);
1494 return NULL;
374ca955 1495 }
b75a7d8f
A
1496
1497 if(inputdir!=NULL){
1498 if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
1499 {
1500
1501 uprv_strcpy(fullname, inputdir);
1502
1503 fullname[inputdirLength] = U_FILE_SEP_CHAR;
1504 fullname[inputdirLength + 1] = '\0';
1505
1506 uprv_strcat(fullname, filename);
1507 }
1508 else
1509 {
1510 uprv_strcpy(fullname, inputdir);
1511 uprv_strcat(fullname, filename);
1512 }
1513 }else{
1514 uprv_strcpy(fullname,filename);
1515 }
1516
1517 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1518
1519 if (U_FAILURE(*status)) {
1520 error(line, "couldn't open input file %s\n", filename);
1521 return NULL;
1522 }
1523
1524 uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
374ca955 1525 result = string_open(bundle, tag, uBuffer, len, comment, status);
b75a7d8f
A
1526
1527 uprv_free(pTarget);
1528
1529 uprv_free(filename);
1530 uprv_free(fullname);
1531
1532 return result;
1533}
1534
73c04bcf
A
1535
1536
1537
1538
1539U_STRING_DECL(k_type_string, "string", 6);
1540U_STRING_DECL(k_type_binary, "binary", 6);
1541U_STRING_DECL(k_type_bin, "bin", 3);
1542U_STRING_DECL(k_type_table, "table", 5);
1543U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17);
1544U_STRING_DECL(k_type_int, "int", 3);
1545U_STRING_DECL(k_type_integer, "integer", 7);
1546U_STRING_DECL(k_type_array, "array", 5);
1547U_STRING_DECL(k_type_alias, "alias", 5);
1548U_STRING_DECL(k_type_intvector, "intvector", 9);
1549U_STRING_DECL(k_type_import, "import", 6);
1550U_STRING_DECL(k_type_include, "include", 7);
1551U_STRING_DECL(k_type_reserved, "reserved", 8);
1552
1553/* Various non-standard processing plugins that create one or more special resources. */
1554U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1555U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18);
1556U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23);
1557U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19);
1558
1559typedef enum EResourceType
1560{
1561 RT_UNKNOWN,
1562 RT_STRING,
1563 RT_BINARY,
1564 RT_TABLE,
1565 RT_TABLE_NO_FALLBACK,
1566 RT_INTEGER,
1567 RT_ARRAY,
1568 RT_ALIAS,
1569 RT_INTVECTOR,
1570 RT_IMPORT,
1571 RT_INCLUDE,
1572 RT_PROCESS_UCA_RULES,
1573 RT_PROCESS_COLLATION,
1574 RT_PROCESS_TRANSLITERATOR,
1575 RT_PROCESS_DEPENDENCY,
1576 RT_RESERVED
1577} EResourceType;
1578
1579static struct {
1580 const char *nameChars; /* only used for debugging */
1581 const UChar *nameUChars;
1582 ParseResourceFunction *parseFunction;
1583} gResourceTypes[] = {
1584 {"Unknown", NULL, NULL},
1585 {"string", k_type_string, parseString},
1586 {"binary", k_type_binary, parseBinary},
1587 {"table", k_type_table, parseTable},
1588 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1589 {"integer", k_type_integer, parseInteger},
1590 {"array", k_type_array, parseArray},
1591 {"alias", k_type_alias, parseAlias},
1592 {"intvector", k_type_intvector, parseIntVector},
1593 {"import", k_type_import, parseImport},
1594 {"include", k_type_include, parseInclude},
1595 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1596 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1597 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1598 {"process(dependency)", k_type_plugin_dependency, parseDependency},
1599 {"reserved", NULL, NULL}
1600};
1601
1602void initParser(UBool makeBinaryCollation)
1603{
1604 uint32_t i;
1605
1606 U_STRING_INIT(k_type_string, "string", 6);
1607 U_STRING_INIT(k_type_binary, "binary", 6);
1608 U_STRING_INIT(k_type_bin, "bin", 3);
1609 U_STRING_INIT(k_type_table, "table", 5);
1610 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17);
1611 U_STRING_INIT(k_type_int, "int", 3);
1612 U_STRING_INIT(k_type_integer, "integer", 7);
1613 U_STRING_INIT(k_type_array, "array", 5);
1614 U_STRING_INIT(k_type_alias, "alias", 5);
1615 U_STRING_INIT(k_type_intvector, "intvector", 9);
1616 U_STRING_INIT(k_type_import, "import", 6);
1617 U_STRING_INIT(k_type_reserved, "reserved", 8);
1618 U_STRING_INIT(k_type_include, "include", 7);
1619
1620 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1621 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18);
1622 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23);
1623 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19);
1624
1625 for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
1626 {
1627 ustr_init(&lookahead[i].value);
1628 }
1629 gMakeBinaryCollation = makeBinaryCollation;
1630}
1631
1632static U_INLINE UBool isTable(enum EResourceType type) {
1633 return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK);
1634}
1635
1636static enum EResourceType
1637parseResourceType(UErrorCode *status)
1638{
1639 struct UString *tokenValue;
1640 struct UString comment;
1641 enum EResourceType result = RT_UNKNOWN;
1642 uint32_t line=0;
1643 ustr_init(&comment);
1644 expect(TOK_STRING, &tokenValue, &comment, &line, status);
1645
1646 if (U_FAILURE(*status))
1647 {
1648 return RT_UNKNOWN;
1649 }
1650
1651 *status = U_ZERO_ERROR;
1652
1653 /* Search for normal types */
1654 result=RT_UNKNOWN;
1655 while (++result < RT_RESERVED) {
1656 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1657 break;
1658 }
1659 }
1660 /* Now search for the aliases */
1661 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
1662 result = RT_INTEGER;
1663 }
1664 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
1665 result = RT_BINARY;
1666 }
1667 else if (result == RT_RESERVED) {
1668 char tokenBuffer[1024];
1669 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1670 tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1671 *status = U_INVALID_FORMAT_ERROR;
1672 error(line, "unknown resource type '%s'", tokenBuffer);
1673 }
1674
1675 return result;
1676}
1677
1678/* parse a non-top-level resource */
b75a7d8f 1679static struct SResource *
374ca955 1680parseResource(char *tag, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
1681{
1682 enum ETokenType token;
1683 enum EResourceType resType = RT_UNKNOWN;
73c04bcf 1684 ParseResourceFunction *parseFunction = NULL;
b75a7d8f
A
1685 struct UString *tokenValue;
1686 uint32_t startline;
1687 uint32_t line;
1688
374ca955 1689 token = getToken(&tokenValue, NULL, &startline, status);
b75a7d8f
A
1690
1691 if(isVerbose()){
374ca955 1692 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1693 }
1694
1695 /* name . [ ':' type ] '{' resource '}' */
1696 /* This function parses from the colon onwards. If the colon is present, parse the
1697 type then try to parse a resource of that type. If there is no explicit type,
1698 work it out using the lookahead tokens. */
1699 switch (token)
1700 {
1701 case TOK_EOF:
1702 *status = U_INVALID_FORMAT_ERROR;
1703 error(startline, "Unexpected EOF encountered");
1704 return NULL;
1705
1706 case TOK_ERROR:
1707 *status = U_INVALID_FORMAT_ERROR;
1708 return NULL;
1709
1710 case TOK_COLON:
1711 resType = parseResourceType(status);
374ca955 1712 expect(TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
b75a7d8f
A
1713
1714 if (U_FAILURE(*status))
1715 {
1716 return NULL;
1717 }
1718
1719 break;
1720
1721 case TOK_OPEN_BRACE:
1722 break;
1723
1724 default:
1725 *status = U_INVALID_FORMAT_ERROR;
1726 error(startline, "syntax error while reading a resource, expected '{' or ':'");
1727 return NULL;
1728 }
1729
1730 if (resType == RT_UNKNOWN)
1731 {
1732 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
1733 We could have any of the following:
1734 { { => array (nested)
1735 { :/} => array
1736 { string , => string array
1737
1738 commented by Jing/GCL
1739 { string { => table
1740
1741 added by Jing/GCL
1742
1743 { string :/{ => table
1744 { string } => string
1745 */
1746
374ca955 1747 token = peekToken(0, NULL, &line, NULL,status);
b75a7d8f
A
1748
1749 if (U_FAILURE(*status))
1750 {
1751 return NULL;
1752 }
1753
1754 /* Commented by Jing/GCL */
1755 /* if (token == TOK_OPEN_BRACE || token == TOK_COLON )*/
1756 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
1757 {
1758 resType = RT_ARRAY;
1759 }
1760 else if (token == TOK_STRING)
1761 {
374ca955 1762 token = peekToken(1, NULL, &line, NULL, status);
b75a7d8f
A
1763
1764 if (U_FAILURE(*status))
1765 {
1766 return NULL;
1767 }
1768
1769 switch (token)
1770 {
1771 case TOK_COMMA: resType = RT_ARRAY; break;
1772 case TOK_OPEN_BRACE: resType = RT_TABLE; break;
1773 case TOK_CLOSE_BRACE: resType = RT_STRING; break;
1774 /* added by Jing/GCL to make table work when :table is omitted */
1775 case TOK_COLON: resType = RT_TABLE; break;
1776 default:
1777 *status = U_INVALID_FORMAT_ERROR;
1778 error(line, "Unexpected token after string, expected ',', '{' or '}'");
1779 return NULL;
1780 }
1781 }
1782 else
1783 {
1784 *status = U_INVALID_FORMAT_ERROR;
1785 error(line, "Unexpected token after '{'");
1786 return NULL;
1787 }
1788
1789 /* printf("Type guessed as %s\n", resourceNames[resType]); */
73c04bcf
A
1790 } else if(resType == RT_TABLE_NO_FALLBACK) {
1791 *status = U_INVALID_FORMAT_ERROR;
1792 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
1793 return NULL;
b75a7d8f
A
1794 }
1795
1796 /* We should now know what we need to parse next, so call the appropriate parser
1797 function and return. */
73c04bcf
A
1798 parseFunction = gResourceTypes[resType].parseFunction;
1799 if (parseFunction != NULL) {
1800 return parseFunction(tag, startline, comment, status);
1801 }
1802 else {
b75a7d8f 1803 *status = U_INTERNAL_PROGRAM_ERROR;
73c04bcf 1804 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
b75a7d8f
A
1805 }
1806
1807 return NULL;
1808}
1809
73c04bcf 1810/* parse the top-level resource */
b75a7d8f 1811struct SRBRoot *
73c04bcf 1812parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UErrorCode *status)
b75a7d8f
A
1813{
1814 struct UString *tokenValue;
374ca955 1815 struct UString comment;
b75a7d8f
A
1816 uint32_t line;
1817 /* added by Jing/GCL */
1818 enum EResourceType bundleType;
1819 enum ETokenType token;
1820
1821 initLookahead(buf, status);
1822
73c04bcf 1823 inputdir = inputDir;
374ca955 1824 inputdirLength = (inputdir != NULL) ? (uint32_t)uprv_strlen(inputdir) : 0;
73c04bcf
A
1825 outputdir = outputDir;
1826 outputdirLength = (outputdir != NULL) ? (uint32_t)uprv_strlen(outputdir) : 0;
374ca955
A
1827
1828 ustr_init(&comment);
1829 expect(TOK_STRING, &tokenValue, &comment, NULL, status);
b75a7d8f 1830
374ca955 1831 bundle = bundle_open(&comment, status);
b75a7d8f
A
1832
1833 if (bundle == NULL || U_FAILURE(*status))
1834 {
1835 return NULL;
1836 }
1837
374ca955 1838
b75a7d8f
A
1839 bundle_setlocale(bundle, tokenValue->fChars, status);
1840 /* Commented by Jing/GCL */
1841 /* expect(TOK_OPEN_BRACE, NULL, &line, status); */
1842 /* The following code is to make Empty bundle work no matter with :table specifer or not */
374ca955 1843 token = getToken(NULL, NULL, &line, status);
73c04bcf 1844 if(token==TOK_COLON) {
b75a7d8f 1845 *status=U_ZERO_ERROR;
b75a7d8f
A
1846 bundleType=parseResourceType(status);
1847
73c04bcf 1848 if(isTable(bundleType))
b75a7d8f 1849 {
374ca955 1850 expect(TOK_OPEN_BRACE, NULL, NULL, &line, status);
b75a7d8f
A
1851 }
1852 else
1853 {
1854 *status=U_PARSE_ERROR;
1855 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
1856 }
1857 }
1858 else
1859 {
73c04bcf 1860 /* not a colon */
b75a7d8f
A
1861 if(token==TOK_OPEN_BRACE)
1862 {
1863 *status=U_ZERO_ERROR;
73c04bcf 1864 bundleType=RT_TABLE;
b75a7d8f
A
1865 }
1866 else
1867 {
73c04bcf
A
1868 /* neither colon nor open brace */
1869 *status=U_PARSE_ERROR;
1870 bundleType=RT_UNKNOWN;
b75a7d8f
A
1871 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
1872 }
1873 }
1874 /* The above is added by Jing/GCL */
1875
1876 if (U_FAILURE(*status))
1877 {
1878 bundle_close(bundle, status);
1879 return NULL;
1880 }
1881
73c04bcf
A
1882 if(bundleType==RT_TABLE_NO_FALLBACK) {
1883 /*
1884 * Parse a top-level table with the table(nofallback) declaration.
1885 * This is the same as a regular table, but also sets the
1886 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
1887 */
1888 bundle->noFallback=TRUE;
1889 }
1890 /* top-level tables need not handle special table names like "collations" */
b75a7d8f 1891 realParseTable(bundle->fRoot, NULL, line, status);
73c04bcf
A
1892
1893 if(dependencyArray!=NULL){
1894 table_add(bundle->fRoot, dependencyArray, 0, status);
1895 dependencyArray = NULL;
1896 }
b75a7d8f
A
1897 if (U_FAILURE(*status))
1898 {
1899 bundle_close(bundle, status);
73c04bcf 1900 array_close(dependencyArray, status);
b75a7d8f
A
1901 return NULL;
1902 }
1903
374ca955 1904 if (getToken(NULL, NULL, &line, status) != TOK_EOF)
b75a7d8f
A
1905 {
1906 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
1907 if(isStrict()){
1908 *status = U_INVALID_FORMAT_ERROR;
1909 return NULL;
1910 }
1911 }
1912
1913 return bundle;
1914}