]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/tools/genrb/parse.c
ICU-8.11.1.tar.gz
[apple/icu.git] / icuSources / tools / genrb / parse.c
... / ...
CommitLineData
1/*
2*******************************************************************************
3*
4* Copyright (C) 1998-2006, International Business Machines
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8*
9* File parse.c
10*
11* Modification History:
12*
13* Date Name Description
14* 05/26/99 stephen Creation.
15* 02/25/00 weiv Overhaul to write udata
16* 5/10/01 Ram removed ustdio dependency
17* 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
18*******************************************************************************
19*/
20
21#include "ucol_imp.h"
22#include "parse.h"
23#include "errmsg.h"
24#include "uhash.h"
25#include "cmemory.h"
26#include "cstring.h"
27#include "uinvchar.h"
28#include "read.h"
29#include "ustr.h"
30#include "reslist.h"
31#include "rbt_pars.h"
32#include "unicode/ustring.h"
33#include "unicode/putil.h"
34#include <stdio.h>
35
36/* Number of tokens to read ahead of the current stream position */
37#define MAX_LOOKAHEAD 3
38
39#define CR 0x000D
40#define LF 0x000A
41#define SPACE 0x0020
42#define TAB 0x0009
43#define ESCAPE 0x005C
44#define HASH 0x0023
45#define QUOTE 0x0027
46#define ZERO 0x0030
47#define STARTCOMMAND 0x005B
48#define ENDCOMMAND 0x005D
49#define OPENSQBRACKET 0x005B
50#define CLOSESQBRACKET 0x005D
51
52typedef struct SResource *
53ParseResourceFunction(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
54
55struct Lookahead
56{
57 enum ETokenType type;
58 struct UString value;
59 struct UString comment;
60 uint32_t line;
61};
62
63/* keep in sync with token defines in read.h */
64const char *tokenNames[TOK_TOKEN_COUNT] =
65{
66 "string", /* A string token, such as "MonthNames" */
67 "'{'", /* An opening brace character */
68 "'}'", /* A closing brace character */
69 "','", /* A comma */
70 "':'", /* A colon */
71
72 "<end of file>", /* End of the file has been reached successfully */
73 "<end of line>"
74};
75
76/* Just to store "TRUE" */
77static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
78
79static struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
80static uint32_t lookaheadPosition;
81static UCHARBUF *buffer;
82
83static struct SRBRoot *bundle;
84static const char *inputdir;
85static uint32_t inputdirLength;
86static const char *outputdir;
87static uint32_t outputdirLength;
88
89static UBool gMakeBinaryCollation = TRUE;
90
91static struct SResource *parseResource(char *tag, const struct UString *comment, UErrorCode *status);
92
93/* The nature of the lookahead buffer:
94 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
95 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
96 When getToken is called, the current pointer is moved to the next slot and the
97 old slot is filled with the next token from the reader by calling getNextToken.
98 The token values are stored in the slot, which means that token values don't
99 survive a call to getToken, ie.
100
101 UString *value;
102
103 getToken(&value, NULL, status);
104 getToken(NULL, NULL, status); bad - value is now a different string
105*/
106static void
107initLookahead(UCHARBUF *buf, UErrorCode *status)
108{
109 static uint32_t initTypeStrings = 0;
110 uint32_t i;
111
112 if (!initTypeStrings)
113 {
114 initTypeStrings = 1;
115 }
116
117 lookaheadPosition = 0;
118 buffer = buf;
119
120 resetLineNumber();
121
122 for (i = 0; i < MAX_LOOKAHEAD; i++)
123 {
124 lookahead[i].type = getNextToken(buffer, &lookahead[i].value, &lookahead[i].line, &lookahead[i].comment, status);
125 if (U_FAILURE(*status))
126 {
127 return;
128 }
129 }
130
131 *status = U_ZERO_ERROR;
132}
133
134static enum ETokenType
135getToken(struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
136{
137 enum ETokenType result;
138 uint32_t i;
139
140 result = lookahead[lookaheadPosition].type;
141
142 if (tokenValue != NULL)
143 {
144 *tokenValue = &lookahead[lookaheadPosition].value;
145 }
146
147 if (linenumber != NULL)
148 {
149 *linenumber = lookahead[lookaheadPosition].line;
150 }
151
152 if (comment != NULL)
153 {
154 ustr_cpy(comment, &(lookahead[lookaheadPosition].comment), status);
155 }
156
157 i = (lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
158 lookaheadPosition = (lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
159 ustr_setlen(&lookahead[i].comment, 0, status);
160 ustr_setlen(&lookahead[i].value, 0, status);
161 lookahead[i].type = getNextToken(buffer, &lookahead[i].value, &lookahead[i].line, &lookahead[i].comment, status);
162
163 /* printf("getToken, returning %s\n", tokenNames[result]); */
164
165 return result;
166}
167
168static enum ETokenType
169peekToken(uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
170{
171 uint32_t i = (lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
172
173 if (U_FAILURE(*status))
174 {
175 return TOK_ERROR;
176 }
177
178 if (lookaheadCount >= MAX_LOOKAHEAD)
179 {
180 *status = U_INTERNAL_PROGRAM_ERROR;
181 return TOK_ERROR;
182 }
183
184 if (tokenValue != NULL)
185 {
186 *tokenValue = &lookahead[i].value;
187 }
188
189 if (linenumber != NULL)
190 {
191 *linenumber = lookahead[i].line;
192 }
193
194 if(comment != NULL){
195 ustr_cpy(comment, &(lookahead[lookaheadPosition].comment), status);
196 }
197
198 return lookahead[i].type;
199}
200
201static void
202expect(enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
203{
204 uint32_t line;
205
206 enum ETokenType token = getToken(tokenValue, comment, &line, status);
207
208 if (linenumber != NULL)
209 {
210 *linenumber = line;
211 }
212
213 if (U_FAILURE(*status))
214 {
215 return;
216 }
217
218 if (token != expectedToken)
219 {
220 *status = U_INVALID_FORMAT_ERROR;
221 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
222 }
223 else /* "else" is added by Jing/GCL */
224 {
225 *status = U_ZERO_ERROR;
226 }
227}
228
229static char *getInvariantString(uint32_t *line, struct UString *comment, UErrorCode *status)
230{
231 struct UString *tokenValue;
232 char *result;
233 uint32_t count;
234
235 expect(TOK_STRING, &tokenValue, comment, line, status);
236
237 if (U_FAILURE(*status))
238 {
239 return NULL;
240 }
241
242 count = u_strlen(tokenValue->fChars);
243 if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
244 *status = U_INVALID_FORMAT_ERROR;
245 error(*line, "invariant characters required for table keys, binary data, etc.");
246 return NULL;
247 }
248
249 result = uprv_malloc(count+1);
250
251 if (result == NULL)
252 {
253 *status = U_MEMORY_ALLOCATION_ERROR;
254 return NULL;
255 }
256
257 u_UCharsToChars(tokenValue->fChars, result, count+1);
258 return result;
259}
260
261static struct SResource *
262parseUCARules(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
263{
264 struct SResource *result = NULL;
265 struct UString *tokenValue;
266 FileStream *file = NULL;
267 char filename[256] = { '\0' };
268 char cs[128] = { '\0' };
269 uint32_t line;
270 int len=0;
271 UBool quoted = FALSE;
272 UCHARBUF *ucbuf=NULL;
273 UChar32 c = 0;
274 const char* cp = NULL;
275 UChar *pTarget = NULL;
276 UChar *target = NULL;
277 UChar *targetLimit = NULL;
278 int32_t size = 0;
279
280 expect(TOK_STRING, &tokenValue, NULL, &line, status);
281
282 if(isVerbose()){
283 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
284 }
285
286 if (U_FAILURE(*status))
287 {
288 return NULL;
289 }
290 /* make the filename including the directory */
291 if (inputdir != NULL)
292 {
293 uprv_strcat(filename, inputdir);
294
295 if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
296 {
297 uprv_strcat(filename, U_FILE_SEP_STRING);
298 }
299 }
300
301 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
302
303 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
304
305 if (U_FAILURE(*status))
306 {
307 return NULL;
308 }
309 uprv_strcat(filename, cs);
310
311
312 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
313
314 if (U_FAILURE(*status)) {
315 error(line, "An error occured while opening the input file %s\n", filename);
316 return NULL;
317 }
318
319 /* We allocate more space than actually required
320 * since the actual size needed for storing UChars
321 * is not known in UTF-8 byte stream
322 */
323 size = ucbuf_size(ucbuf) + 1;
324 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
325 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
326 target = pTarget;
327 targetLimit = pTarget+size;
328
329 /* read the rules into the buffer */
330 while (target < targetLimit)
331 {
332 c = ucbuf_getc(ucbuf, status);
333 if(c == QUOTE) {
334 quoted = (UBool)!quoted;
335 }
336 /* weiv (06/26/2002): adding the following:
337 * - preserving spaces in commands [...]
338 * - # comments until the end of line
339 */
340 if (c == STARTCOMMAND && !quoted)
341 {
342 /* preserve commands
343 * closing bracket will be handled by the
344 * append at the end of the loop
345 */
346 while(c != ENDCOMMAND) {
347 U_APPEND_CHAR32(c, target,len);
348 c = ucbuf_getc(ucbuf, status);
349 }
350 }
351 else if (c == HASH && !quoted) {
352 /* skip comments */
353 while(c != CR && c != LF) {
354 c = ucbuf_getc(ucbuf, status);
355 }
356 continue;
357 }
358 else if (c == ESCAPE)
359 {
360 c = unescape(ucbuf, status);
361
362 if (c == U_ERR)
363 {
364 uprv_free(pTarget);
365 T_FileStream_close(file);
366 return NULL;
367 }
368 }
369 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
370 {
371 /* ignore spaces carriage returns
372 * and line feed unless in the form \uXXXX
373 */
374 continue;
375 }
376
377 /* Append UChar * after dissembling if c > 0xffff*/
378 if (c != U_EOF)
379 {
380 U_APPEND_CHAR32(c, target,len);
381 }
382 else
383 {
384 break;
385 }
386 }
387
388 /* terminate the string */
389 if(target < targetLimit){
390 *target = 0x0000;
391 }
392
393 result = string_open(bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
394
395
396 ucbuf_close(ucbuf);
397 uprv_free(pTarget);
398 T_FileStream_close(file);
399
400 return result;
401}
402
403static struct SResource *
404parseTransliterator(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
405{
406 struct SResource *result = NULL;
407 struct UString *tokenValue;
408 FileStream *file = NULL;
409 char filename[256] = { '\0' };
410 char cs[128] = { '\0' };
411 uint32_t line;
412 UCHARBUF *ucbuf=NULL;
413 const char* cp = NULL;
414 UChar *pTarget = NULL;
415 const UChar *pSource = NULL;
416 int32_t size = 0;
417
418 expect(TOK_STRING, &tokenValue, NULL, &line, status);
419
420 if(isVerbose()){
421 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
422 }
423
424 if (U_FAILURE(*status))
425 {
426 return NULL;
427 }
428 /* make the filename including the directory */
429 if (inputdir != NULL)
430 {
431 uprv_strcat(filename, inputdir);
432
433 if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
434 {
435 uprv_strcat(filename, U_FILE_SEP_STRING);
436 }
437 }
438
439 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
440
441 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
442
443 if (U_FAILURE(*status))
444 {
445 return NULL;
446 }
447 uprv_strcat(filename, cs);
448
449
450 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
451
452 if (U_FAILURE(*status)) {
453 error(line, "An error occured while opening the input file %s\n", filename);
454 return NULL;
455 }
456
457 /* We allocate more space than actually required
458 * since the actual size needed for storing UChars
459 * is not known in UTF-8 byte stream
460 */
461 pSource = ucbuf_getBuffer(ucbuf, &size, status);
462 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
463 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
464
465#if !UCONFIG_NO_TRANSLITERATION
466 size = utrans_stripRules(pSource, size, pTarget, status);
467#else
468 size = 0;
469 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
470#endif
471 result = string_open(bundle, tag, pTarget, size, NULL, status);
472
473 ucbuf_close(ucbuf);
474 uprv_free(pTarget);
475 T_FileStream_close(file);
476
477 return result;
478}
479static struct SResource* dependencyArray = NULL;
480
481static struct SResource *
482parseDependency(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
483{
484 struct SResource *result = NULL;
485 struct SResource *elem = NULL;
486 struct UString *tokenValue;
487 uint32_t line;
488 char filename[256] = { '\0' };
489 char cs[128] = { '\0' };
490
491 expect(TOK_STRING, &tokenValue, NULL, &line, status);
492
493 if(isVerbose()){
494 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
495 }
496
497 if (U_FAILURE(*status))
498 {
499 return NULL;
500 }
501 /* make the filename including the directory */
502 if (outputdir != NULL)
503 {
504 uprv_strcat(filename, outputdir);
505
506 if (outputdir[outputdirLength - 1] != U_FILE_SEP_CHAR)
507 {
508 uprv_strcat(filename, U_FILE_SEP_STRING);
509 }
510 }
511
512 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
513
514 if (U_FAILURE(*status))
515 {
516 return NULL;
517 }
518 uprv_strcat(filename, cs);
519 if(!T_FileStream_file_exists(filename)){
520 if(isStrict()){
521 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
522 }else{
523 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
524 }
525 }
526 if(dependencyArray==NULL){
527 dependencyArray = array_open(bundle, "%%DEPENDENCY", NULL, status);
528 }
529 if(tag!=NULL){
530 result = string_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
531 }
532 elem = string_open(bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
533
534 array_add(dependencyArray, elem, status);
535
536 if (U_FAILURE(*status))
537 {
538 return NULL;
539 }
540 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
541 return result;
542}
543static struct SResource *
544parseString(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
545{
546 struct UString *tokenValue;
547 struct SResource *result = NULL;
548
549/* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
550 {
551 return parseUCARules(tag, startline, status);
552 }*/
553 if(isVerbose()){
554 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
555 }
556 expect(TOK_STRING, &tokenValue, NULL, NULL, status);
557
558 if (U_SUCCESS(*status))
559 {
560 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
561 doesn't survive expect either) */
562
563 result = string_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
564 if(U_SUCCESS(*status) && result) {
565 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
566
567 if (U_FAILURE(*status))
568 {
569 string_close(result, status);
570 return NULL;
571 }
572 }
573 }
574
575 return result;
576}
577
578static struct SResource *
579parseAlias(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
580{
581 struct UString *tokenValue;
582 struct SResource *result = NULL;
583
584 expect(TOK_STRING, &tokenValue, NULL, NULL, status);
585
586 if(isVerbose()){
587 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
588 }
589
590 if (U_SUCCESS(*status))
591 {
592 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
593 doesn't survive expect either) */
594
595 result = alias_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
596
597 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
598
599 if (U_FAILURE(*status))
600 {
601 alias_close(result, status);
602 return NULL;
603 }
604 }
605
606 return result;
607}
608
609static struct SResource *
610addCollation(struct SResource *result, uint32_t startline, UErrorCode *status)
611{
612 struct SResource *member = NULL;
613 struct UString *tokenValue;
614 struct UString comment;
615 enum ETokenType token;
616 char subtag[1024];
617 UVersionInfo version;
618 UBool override = FALSE;
619 uint32_t line;
620 /* '{' . (name resource)* '}' */
621 version[0]=0; version[1]=0; version[2]=0; version[3]=0;
622
623 for (;;)
624 {
625 ustr_init(&comment);
626 token = getToken(&tokenValue, &comment, &line, status);
627
628 if (token == TOK_CLOSE_BRACE)
629 {
630 return result;
631 }
632
633 if (token != TOK_STRING)
634 {
635 table_close(result, status);
636 *status = U_INVALID_FORMAT_ERROR;
637
638 if (token == TOK_EOF)
639 {
640 error(startline, "unterminated table");
641 }
642 else
643 {
644 error(line, "Unexpected token %s", tokenNames[token]);
645 }
646
647 return NULL;
648 }
649
650 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
651
652 if (U_FAILURE(*status))
653 {
654 table_close(result, status);
655 return NULL;
656 }
657
658 member = parseResource(subtag, NULL, status);
659
660 if (U_FAILURE(*status))
661 {
662 table_close(result, status);
663 return NULL;
664 }
665
666 if (uprv_strcmp(subtag, "Version") == 0)
667 {
668 char ver[40];
669 int32_t length = member->u.fString.fLength;
670
671 if (length >= (int32_t) sizeof(ver))
672 {
673 length = (int32_t) sizeof(ver) - 1;
674 }
675
676 u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
677 u_versionFromString(version, ver);
678
679 table_add(result, member, line, status);
680
681 }
682 else if (uprv_strcmp(subtag, "Override") == 0)
683 {
684 override = FALSE;
685
686 if (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0)
687 {
688 override = TRUE;
689 }
690 table_add(result, member, line, status);
691
692 }
693 else if(uprv_strcmp(subtag, "%%CollationBin")==0)
694 {
695 /* discard duplicate %%CollationBin if any*/
696 }
697 else if (uprv_strcmp(subtag, "Sequence") == 0)
698 {
699#if UCONFIG_NO_COLLATION
700 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION, see uconfig.h");
701#else
702 /* first we add the "Sequence", so that we always have rules */
703 table_add(result, member, line, status);
704 if(gMakeBinaryCollation) {
705 UErrorCode intStatus = U_ZERO_ERROR;
706
707 /* do the collation elements */
708 int32_t len = 0;
709 uint8_t *data = NULL;
710 UCollator *coll = NULL;
711 UParseError parseError;
712 /* add sequence */
713 /*table_add(result, member, line, status);*/
714
715 coll = ucol_openRules(member->u.fString.fChars, member->u.fString.fLength,
716 UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, &intStatus);
717
718 if (U_SUCCESS(intStatus) && coll != NULL)
719 {
720 len = ucol_cloneBinary(coll, NULL, 0, &intStatus);
721 data = (uint8_t *)uprv_malloc(len);
722 intStatus = U_ZERO_ERROR;
723 len = ucol_cloneBinary(coll, data, len, &intStatus);
724 /*data = ucol_cloneRuleData(coll, &len, &intStatus);*/
725
726 /* tailoring rules version */
727 /* This is wrong! */
728 /*coll->dataInfo.dataVersion[1] = version[0];*/
729 /* Copy tailoring version. Builder version already */
730 /* set in ucol_openRules */
731 ((UCATableHeader *)data)->version[1] = version[0];
732 ((UCATableHeader *)data)->version[2] = version[1];
733 ((UCATableHeader *)data)->version[3] = version[2];
734
735 if (U_SUCCESS(intStatus) && data != NULL)
736 {
737 member = bin_open(bundle, "%%CollationBin", len, data, NULL, NULL, status);
738 /*table_add(bundle->fRoot, member, line, status);*/
739 table_add(result, member, line, status);
740 uprv_free(data);
741 }
742 else
743 {
744 warning(line, "could not obtain rules from collator");
745 if(isStrict()){
746 *status = U_INVALID_FORMAT_ERROR;
747 return NULL;
748 }
749 }
750
751 ucol_close(coll);
752 }
753 else
754 {
755 warning(line, "%%Collation could not be constructed from CollationElements - check context!");
756 if(isStrict()){
757 *status = intStatus;
758 return NULL;
759 }
760 }
761 } else {
762 if(isVerbose()) {
763 printf("Not building Collation binary\n");
764 }
765 }
766#endif
767 }
768
769 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
770
771 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
772
773 if (U_FAILURE(*status))
774 {
775 table_close(result, status);
776 return NULL;
777 }
778 }
779
780 /* not reached */
781 /* A compiler warning will appear if all paths don't contain a return statement. */
782/* *status = U_INTERNAL_PROGRAM_ERROR;
783 return NULL;*/
784}
785
786static struct SResource *
787parseCollationElements(char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
788{
789 struct SResource *result = NULL;
790 struct SResource *member = NULL;
791 struct SResource *collationRes = NULL;
792 struct UString *tokenValue;
793 struct UString comment;
794 enum ETokenType token;
795 char subtag[1024], typeKeyword[1024];
796 uint32_t line;
797
798 result = table_open(bundle, tag, NULL, status);
799
800 if (result == NULL || U_FAILURE(*status))
801 {
802 return NULL;
803 }
804 if(isVerbose()){
805 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
806 }
807 if(!newCollation) {
808 return addCollation(result, startline, status);
809 }
810 else {
811 for(;;) {
812 ustr_init(&comment);
813 token = getToken(&tokenValue, &comment, &line, status);
814
815 if (token == TOK_CLOSE_BRACE)
816 {
817 return result;
818 }
819
820 if (token != TOK_STRING)
821 {
822 table_close(result, status);
823 *status = U_INVALID_FORMAT_ERROR;
824
825 if (token == TOK_EOF)
826 {
827 error(startline, "unterminated table");
828 }
829 else
830 {
831 error(line, "Unexpected token %s", tokenNames[token]);
832 }
833
834 return NULL;
835 }
836
837 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
838
839 if (U_FAILURE(*status))
840 {
841 table_close(result, status);
842 return NULL;
843 }
844
845 if (uprv_strcmp(subtag, "default") == 0)
846 {
847 member = parseResource(subtag, NULL, status);
848
849 if (U_FAILURE(*status))
850 {
851 table_close(result, status);
852 return NULL;
853 }
854
855 table_add(result, member, line, status);
856 }
857 else
858 {
859 token = peekToken(0, &tokenValue, &line, &comment, status);
860 /* this probably needs to be refactored or recursively use the parser */
861 /* first we assume that our collation table won't have the explicit type */
862 /* then, we cannot handle aliases */
863 if(token == TOK_OPEN_BRACE) {
864 token = getToken(&tokenValue, &comment, &line, status);
865 collationRes = table_open(bundle, subtag, NULL, status);
866 table_add(result, addCollation(collationRes, startline, status), startline, status);
867 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
868 /* we could have a table too */
869 token = peekToken(1, &tokenValue, &line, &comment, status);
870 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
871 if(uprv_strcmp(typeKeyword, "alias") == 0) {
872 member = parseResource(subtag, NULL, status);
873
874 if (U_FAILURE(*status))
875 {
876 table_close(result, status);
877 return NULL;
878 }
879
880 table_add(result, member, line, status);
881 } else {
882 *status = U_INVALID_FORMAT_ERROR;
883 return NULL;
884 }
885 } else {
886 *status = U_INVALID_FORMAT_ERROR;
887 return NULL;
888 }
889 }
890
891 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
892
893 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
894
895 if (U_FAILURE(*status))
896 {
897 table_close(result, status);
898 return NULL;
899 }
900 }
901 }
902}
903
904/* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
905 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
906static struct SResource *
907realParseTable(struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
908{
909 struct SResource *member = NULL;
910 struct UString *tokenValue=NULL;
911 struct UString comment;
912 enum ETokenType token;
913 char subtag[1024];
914 uint32_t line;
915 UBool readToken = FALSE;
916
917 /* '{' . (name resource)* '}' */
918 if(isVerbose()){
919 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
920 }
921 for (;;)
922 {
923 ustr_init(&comment);
924 token = getToken(&tokenValue, &comment, &line, status);
925
926 if (token == TOK_CLOSE_BRACE)
927 {
928 if (!readToken) {
929 warning(startline, "Encountered empty table");
930 }
931 return table;
932 }
933
934 if (token != TOK_STRING)
935 {
936 table_close(table, status);
937 *status = U_INVALID_FORMAT_ERROR;
938
939 if (token == TOK_EOF)
940 {
941 error(startline, "unterminated table");
942 }
943 else
944 {
945 error(line, "unexpected token %s", tokenNames[token]);
946 }
947
948 return NULL;
949 }
950
951 if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
952 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
953 } else {
954 *status = U_INVALID_FORMAT_ERROR;
955 error(line, "invariant characters required for table keys");
956 table_close(table, status);
957 return NULL;
958 }
959
960 if (U_FAILURE(*status))
961 {
962 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
963 table_close(table, status);
964 return NULL;
965 }
966
967 member = parseResource(subtag, &comment, status);
968
969 if (member == NULL || U_FAILURE(*status))
970 {
971 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
972 table_close(table, status);
973 return NULL;
974 }
975
976 table_add(table, member, line, status);
977
978 if (U_FAILURE(*status))
979 {
980 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
981 table_close(table, status);
982 return NULL;
983 }
984 readToken = TRUE;
985 }
986
987 /* not reached */
988 /* A compiler warning will appear if all paths don't contain a return statement. */
989/* *status = U_INTERNAL_PROGRAM_ERROR;
990 return NULL;*/
991}
992
993static struct SResource *
994parseTable(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
995{
996 struct SResource *result;
997
998 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
999 {
1000 return parseCollationElements(tag, startline, FALSE, status);
1001 }
1002 if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1003 {
1004 return parseCollationElements(tag, startline, TRUE, status);
1005 }
1006 if(isVerbose()){
1007 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1008 }
1009
1010 result = table_open(bundle, tag, comment, status);
1011
1012 if (result == NULL || U_FAILURE(*status))
1013 {
1014 return NULL;
1015 }
1016
1017 return realParseTable(result, tag, startline, status);
1018}
1019
1020static struct SResource *
1021parseArray(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1022{
1023 struct SResource *result = NULL;
1024 struct SResource *member = NULL;
1025 struct UString *tokenValue;
1026 struct UString memberComments;
1027 enum ETokenType token;
1028 UBool readToken = FALSE;
1029
1030 result = array_open(bundle, tag, comment, status);
1031
1032 if (result == NULL || U_FAILURE(*status))
1033 {
1034 return NULL;
1035 }
1036 if(isVerbose()){
1037 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1038 }
1039
1040 ustr_init(&memberComments);
1041
1042 /* '{' . resource [','] '}' */
1043 for (;;)
1044 {
1045 /* reset length */
1046 ustr_setlen(&memberComments, 0, status);
1047
1048 /* check for end of array, but don't consume next token unless it really is the end */
1049 token = peekToken(0, &tokenValue, NULL, &memberComments, status);
1050
1051
1052 if (token == TOK_CLOSE_BRACE)
1053 {
1054 getToken(NULL, NULL, NULL, status);
1055 if (!readToken) {
1056 warning(startline, "Encountered empty array");
1057 }
1058 break;
1059 }
1060
1061 if (token == TOK_EOF)
1062 {
1063 array_close(result, status);
1064 *status = U_INVALID_FORMAT_ERROR;
1065 error(startline, "unterminated array");
1066 return NULL;
1067 }
1068
1069 /* string arrays are a special case */
1070 if (token == TOK_STRING)
1071 {
1072 getToken(&tokenValue, &memberComments, NULL, status);
1073 member = string_open(bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
1074 }
1075 else
1076 {
1077 member = parseResource(NULL, &memberComments, status);
1078 }
1079
1080 if (member == NULL || U_FAILURE(*status))
1081 {
1082 array_close(result, status);
1083 return NULL;
1084 }
1085
1086 array_add(result, member, status);
1087
1088 if (U_FAILURE(*status))
1089 {
1090 array_close(result, status);
1091 return NULL;
1092 }
1093
1094 /* eat optional comma if present */
1095 token = peekToken(0, NULL, NULL, NULL, status);
1096
1097 if (token == TOK_COMMA)
1098 {
1099 getToken(NULL, NULL, NULL, status);
1100 }
1101
1102 if (U_FAILURE(*status))
1103 {
1104 array_close(result, status);
1105 return NULL;
1106 }
1107 readToken = TRUE;
1108 }
1109
1110 return result;
1111}
1112
1113static struct SResource *
1114parseIntVector(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1115{
1116 struct SResource *result = NULL;
1117 enum ETokenType token;
1118 char *string;
1119 int32_t value;
1120 UBool readToken = FALSE;
1121 /* added by Jing/GCL */
1122 char *stopstring;
1123 uint32_t len;
1124 struct UString memberComments;
1125
1126 result = intvector_open(bundle, tag, comment, status);
1127
1128 if (result == NULL || U_FAILURE(*status))
1129 {
1130 return NULL;
1131 }
1132
1133 if(isVerbose()){
1134 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1135 }
1136 ustr_init(&memberComments);
1137 /* '{' . string [','] '}' */
1138 for (;;)
1139 {
1140 ustr_setlen(&memberComments, 0, status);
1141
1142 /* check for end of array, but don't consume next token unless it really is the end */
1143 token = peekToken(0, NULL, NULL,&memberComments, status);
1144
1145 if (token == TOK_CLOSE_BRACE)
1146 {
1147 /* it's the end, consume the close brace */
1148 getToken(NULL, NULL, NULL, status);
1149 if (!readToken) {
1150 warning(startline, "Encountered empty int vector");
1151 }
1152 return result;
1153 }
1154
1155 string = getInvariantString(NULL, NULL, status);
1156
1157 if (U_FAILURE(*status))
1158 {
1159 intvector_close(result, status);
1160 return NULL;
1161 }
1162 /* Commented by Jing/GCL */
1163 /*value = uprv_strtol(string, NULL, 10);
1164 intvector_add(result, value, status);
1165
1166 uprv_free(string);
1167
1168 token = peekToken(0, NULL, NULL, status);*/
1169
1170 /* The following is added by Jing/GCL to handle illegal char in the Intvector */
1171 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1172 len=(uint32_t)(stopstring-string);
1173
1174 if(len==uprv_strlen(string))
1175 {
1176 intvector_add(result, value, status);
1177 uprv_free(string);
1178 token = peekToken(0, NULL, NULL, NULL, status);
1179 }
1180 else
1181 {
1182 uprv_free(string);
1183 *status=U_INVALID_CHAR_FOUND;
1184 }
1185 /* The above is added by Jing/GCL */
1186
1187 if (U_FAILURE(*status))
1188 {
1189 intvector_close(result, status);
1190 return NULL;
1191 }
1192
1193 /* the comma is optional (even though it is required to prevent the reader from concatenating
1194 consecutive entries) so that a missing comma on the last entry isn't an error */
1195 if (token == TOK_COMMA)
1196 {
1197 getToken(NULL, NULL, NULL, status);
1198 }
1199 readToken = TRUE;
1200 }
1201
1202 /* not reached */
1203 /* A compiler warning will appear if all paths don't contain a return statement. */
1204/* intvector_close(result, status);
1205 *status = U_INTERNAL_PROGRAM_ERROR;
1206 return NULL;*/
1207}
1208
1209static struct SResource *
1210parseBinary(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1211{
1212 struct SResource *result = NULL;
1213 uint8_t *value;
1214 char *string;
1215 char toConv[3] = {'\0', '\0', '\0'};
1216 uint32_t count;
1217 uint32_t i;
1218 uint32_t line;
1219 /* added by Jing/GCL */
1220 char *stopstring;
1221 uint32_t len;
1222
1223 string = getInvariantString(&line, NULL, status);
1224
1225 if (string == NULL || U_FAILURE(*status))
1226 {
1227 return NULL;
1228 }
1229
1230 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1231
1232 if (U_FAILURE(*status))
1233 {
1234 uprv_free(string);
1235 return NULL;
1236 }
1237
1238 if(isVerbose()){
1239 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1240 }
1241
1242 count = (uint32_t)uprv_strlen(string);
1243 if (count > 0){
1244 if((count % 2)==0){
1245 value = uprv_malloc(sizeof(uint8_t) * count);
1246
1247 if (value == NULL)
1248 {
1249 uprv_free(string);
1250 *status = U_MEMORY_ALLOCATION_ERROR;
1251 return NULL;
1252 }
1253
1254 for (i = 0; i < count; i += 2)
1255 {
1256 toConv[0] = string[i];
1257 toConv[1] = string[i + 1];
1258
1259 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
1260 len=(uint32_t)(stopstring-toConv);
1261
1262 if(len!=uprv_strlen(toConv))
1263 {
1264 uprv_free(string);
1265 *status=U_INVALID_CHAR_FOUND;
1266 return NULL;
1267 }
1268 }
1269
1270 result = bin_open(bundle, tag, (i >> 1), value,NULL, comment, status);
1271
1272 uprv_free(value);
1273 }
1274 else
1275 {
1276 *status = U_INVALID_CHAR_FOUND;
1277 uprv_free(string);
1278 error(line, "Encountered invalid binary string");
1279 return NULL;
1280 }
1281 }
1282 else
1283 {
1284 result = bin_open(bundle, tag, 0, NULL, "",comment,status);
1285 warning(startline, "Encountered empty binary tag");
1286 }
1287 uprv_free(string);
1288
1289 return result;
1290}
1291
1292static struct SResource *
1293parseInteger(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1294{
1295 struct SResource *result = NULL;
1296 int32_t value;
1297 char *string;
1298 /* added by Jing/GCL */
1299 char *stopstring;
1300 uint32_t len;
1301
1302 string = getInvariantString(NULL, NULL, status);
1303
1304 if (string == NULL || U_FAILURE(*status))
1305 {
1306 return NULL;
1307 }
1308
1309 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1310
1311 if (U_FAILURE(*status))
1312 {
1313 uprv_free(string);
1314 return NULL;
1315 }
1316
1317 if(isVerbose()){
1318 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1319 }
1320
1321 if (uprv_strlen(string) <= 0)
1322 {
1323 warning(startline, "Encountered empty integer. Default value is 0.");
1324 }
1325
1326 /* commented by Jing/GCL */
1327 /* value = uprv_strtol(string, NULL, 10);*/
1328 /* result = int_open(bundle, tag, value, status);*/
1329 /* The following is added by Jing/GCL*/
1330 /* to make integer support hexdecimal, octal digit and decimal*/
1331 /* to handle illegal char in the integer*/
1332 value = uprv_strtoul(string, &stopstring, 0);
1333 len=(uint32_t)(stopstring-string);
1334 if(len==uprv_strlen(string))
1335 {
1336 result = int_open(bundle, tag, value, comment, status);
1337 }
1338 else
1339 {
1340 *status=U_INVALID_CHAR_FOUND;
1341 }
1342 uprv_free(string);
1343
1344 return result;
1345}
1346
1347static struct SResource *
1348parseImport(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1349{
1350 struct SResource *result;
1351 FileStream *file;
1352 int32_t len;
1353 uint8_t *data;
1354 char *filename;
1355 uint32_t line;
1356 char *fullname = NULL;
1357 int32_t numRead = 0;
1358 filename = getInvariantString(&line, NULL, status);
1359
1360 if (U_FAILURE(*status))
1361 {
1362 return NULL;
1363 }
1364
1365 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1366
1367 if (U_FAILURE(*status))
1368 {
1369 uprv_free(filename);
1370 return NULL;
1371 }
1372
1373 if(isVerbose()){
1374 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1375 }
1376
1377 /* Open the input file for reading */
1378 if (inputdir == NULL)
1379 {
1380 file = T_FileStream_open(filename, "rb");
1381 }
1382 else
1383 {
1384
1385 int32_t count = (int32_t)uprv_strlen(filename);
1386
1387 if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
1388 {
1389 fullname = (char *) uprv_malloc(inputdirLength + count + 2);
1390
1391 /* test for NULL */
1392 if(fullname == NULL)
1393 {
1394 *status = U_MEMORY_ALLOCATION_ERROR;
1395 return NULL;
1396 }
1397
1398 uprv_strcpy(fullname, inputdir);
1399
1400 fullname[inputdirLength] = U_FILE_SEP_CHAR;
1401 fullname[inputdirLength + 1] = '\0';
1402
1403 uprv_strcat(fullname, filename);
1404 }
1405 else
1406 {
1407 fullname = (char *) uprv_malloc(inputdirLength + count + 1);
1408
1409 /* test for NULL */
1410 if(fullname == NULL)
1411 {
1412 *status = U_MEMORY_ALLOCATION_ERROR;
1413 return NULL;
1414 }
1415
1416 uprv_strcpy(fullname, inputdir);
1417 uprv_strcat(fullname, filename);
1418 }
1419
1420 file = T_FileStream_open(fullname, "rb");
1421
1422 }
1423
1424 if (file == NULL)
1425 {
1426 error(line, "couldn't open input file %s", filename);
1427 *status = U_FILE_ACCESS_ERROR;
1428 return NULL;
1429 }
1430
1431 len = T_FileStream_size(file);
1432 data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
1433 /* test for NULL */
1434 if(data == NULL)
1435 {
1436 *status = U_MEMORY_ALLOCATION_ERROR;
1437 T_FileStream_close (file);
1438 return NULL;
1439 }
1440
1441 numRead = T_FileStream_read (file, data, len);
1442 T_FileStream_close (file);
1443
1444 result = bin_open(bundle, tag, len, data, fullname, comment, status);
1445
1446 uprv_free(data);
1447 uprv_free(filename);
1448 uprv_free(fullname);
1449
1450 return result;
1451}
1452
1453static struct SResource *
1454parseInclude(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1455{
1456 struct SResource *result;
1457 int32_t len=0;
1458 char *filename;
1459 uint32_t line;
1460 UChar *pTarget = NULL;
1461
1462 UCHARBUF *ucbuf;
1463 char *fullname = NULL;
1464 int32_t count = 0;
1465 const char* cp = NULL;
1466 const UChar* uBuffer = NULL;
1467
1468 filename = getInvariantString(&line, NULL, status);
1469 count = (int32_t)uprv_strlen(filename);
1470
1471 if (U_FAILURE(*status))
1472 {
1473 return NULL;
1474 }
1475
1476 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1477
1478 if (U_FAILURE(*status))
1479 {
1480 uprv_free(filename);
1481 return NULL;
1482 }
1483
1484 if(isVerbose()){
1485 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1486 }
1487
1488 fullname = (char *) uprv_malloc(inputdirLength + count + 2);
1489 /* test for NULL */
1490 if(fullname == NULL)
1491 {
1492 *status = U_MEMORY_ALLOCATION_ERROR;
1493 uprv_free(filename);
1494 return NULL;
1495 }
1496
1497 if(inputdir!=NULL){
1498 if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
1499 {
1500
1501 uprv_strcpy(fullname, inputdir);
1502
1503 fullname[inputdirLength] = U_FILE_SEP_CHAR;
1504 fullname[inputdirLength + 1] = '\0';
1505
1506 uprv_strcat(fullname, filename);
1507 }
1508 else
1509 {
1510 uprv_strcpy(fullname, inputdir);
1511 uprv_strcat(fullname, filename);
1512 }
1513 }else{
1514 uprv_strcpy(fullname,filename);
1515 }
1516
1517 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1518
1519 if (U_FAILURE(*status)) {
1520 error(line, "couldn't open input file %s\n", filename);
1521 return NULL;
1522 }
1523
1524 uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
1525 result = string_open(bundle, tag, uBuffer, len, comment, status);
1526
1527 uprv_free(pTarget);
1528
1529 uprv_free(filename);
1530 uprv_free(fullname);
1531
1532 return result;
1533}
1534
1535
1536
1537
1538
1539U_STRING_DECL(k_type_string, "string", 6);
1540U_STRING_DECL(k_type_binary, "binary", 6);
1541U_STRING_DECL(k_type_bin, "bin", 3);
1542U_STRING_DECL(k_type_table, "table", 5);
1543U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17);
1544U_STRING_DECL(k_type_int, "int", 3);
1545U_STRING_DECL(k_type_integer, "integer", 7);
1546U_STRING_DECL(k_type_array, "array", 5);
1547U_STRING_DECL(k_type_alias, "alias", 5);
1548U_STRING_DECL(k_type_intvector, "intvector", 9);
1549U_STRING_DECL(k_type_import, "import", 6);
1550U_STRING_DECL(k_type_include, "include", 7);
1551U_STRING_DECL(k_type_reserved, "reserved", 8);
1552
1553/* Various non-standard processing plugins that create one or more special resources. */
1554U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1555U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18);
1556U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23);
1557U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19);
1558
1559typedef enum EResourceType
1560{
1561 RT_UNKNOWN,
1562 RT_STRING,
1563 RT_BINARY,
1564 RT_TABLE,
1565 RT_TABLE_NO_FALLBACK,
1566 RT_INTEGER,
1567 RT_ARRAY,
1568 RT_ALIAS,
1569 RT_INTVECTOR,
1570 RT_IMPORT,
1571 RT_INCLUDE,
1572 RT_PROCESS_UCA_RULES,
1573 RT_PROCESS_COLLATION,
1574 RT_PROCESS_TRANSLITERATOR,
1575 RT_PROCESS_DEPENDENCY,
1576 RT_RESERVED
1577} EResourceType;
1578
1579static struct {
1580 const char *nameChars; /* only used for debugging */
1581 const UChar *nameUChars;
1582 ParseResourceFunction *parseFunction;
1583} gResourceTypes[] = {
1584 {"Unknown", NULL, NULL},
1585 {"string", k_type_string, parseString},
1586 {"binary", k_type_binary, parseBinary},
1587 {"table", k_type_table, parseTable},
1588 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1589 {"integer", k_type_integer, parseInteger},
1590 {"array", k_type_array, parseArray},
1591 {"alias", k_type_alias, parseAlias},
1592 {"intvector", k_type_intvector, parseIntVector},
1593 {"import", k_type_import, parseImport},
1594 {"include", k_type_include, parseInclude},
1595 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1596 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1597 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1598 {"process(dependency)", k_type_plugin_dependency, parseDependency},
1599 {"reserved", NULL, NULL}
1600};
1601
1602void initParser(UBool makeBinaryCollation)
1603{
1604 uint32_t i;
1605
1606 U_STRING_INIT(k_type_string, "string", 6);
1607 U_STRING_INIT(k_type_binary, "binary", 6);
1608 U_STRING_INIT(k_type_bin, "bin", 3);
1609 U_STRING_INIT(k_type_table, "table", 5);
1610 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17);
1611 U_STRING_INIT(k_type_int, "int", 3);
1612 U_STRING_INIT(k_type_integer, "integer", 7);
1613 U_STRING_INIT(k_type_array, "array", 5);
1614 U_STRING_INIT(k_type_alias, "alias", 5);
1615 U_STRING_INIT(k_type_intvector, "intvector", 9);
1616 U_STRING_INIT(k_type_import, "import", 6);
1617 U_STRING_INIT(k_type_reserved, "reserved", 8);
1618 U_STRING_INIT(k_type_include, "include", 7);
1619
1620 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1621 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18);
1622 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23);
1623 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19);
1624
1625 for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
1626 {
1627 ustr_init(&lookahead[i].value);
1628 }
1629 gMakeBinaryCollation = makeBinaryCollation;
1630}
1631
1632static U_INLINE UBool isTable(enum EResourceType type) {
1633 return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK);
1634}
1635
1636static enum EResourceType
1637parseResourceType(UErrorCode *status)
1638{
1639 struct UString *tokenValue;
1640 struct UString comment;
1641 enum EResourceType result = RT_UNKNOWN;
1642 uint32_t line=0;
1643 ustr_init(&comment);
1644 expect(TOK_STRING, &tokenValue, &comment, &line, status);
1645
1646 if (U_FAILURE(*status))
1647 {
1648 return RT_UNKNOWN;
1649 }
1650
1651 *status = U_ZERO_ERROR;
1652
1653 /* Search for normal types */
1654 result=RT_UNKNOWN;
1655 while (++result < RT_RESERVED) {
1656 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1657 break;
1658 }
1659 }
1660 /* Now search for the aliases */
1661 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
1662 result = RT_INTEGER;
1663 }
1664 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
1665 result = RT_BINARY;
1666 }
1667 else if (result == RT_RESERVED) {
1668 char tokenBuffer[1024];
1669 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1670 tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1671 *status = U_INVALID_FORMAT_ERROR;
1672 error(line, "unknown resource type '%s'", tokenBuffer);
1673 }
1674
1675 return result;
1676}
1677
1678/* parse a non-top-level resource */
1679static struct SResource *
1680parseResource(char *tag, const struct UString *comment, UErrorCode *status)
1681{
1682 enum ETokenType token;
1683 enum EResourceType resType = RT_UNKNOWN;
1684 ParseResourceFunction *parseFunction = NULL;
1685 struct UString *tokenValue;
1686 uint32_t startline;
1687 uint32_t line;
1688
1689 token = getToken(&tokenValue, NULL, &startline, status);
1690
1691 if(isVerbose()){
1692 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1693 }
1694
1695 /* name . [ ':' type ] '{' resource '}' */
1696 /* This function parses from the colon onwards. If the colon is present, parse the
1697 type then try to parse a resource of that type. If there is no explicit type,
1698 work it out using the lookahead tokens. */
1699 switch (token)
1700 {
1701 case TOK_EOF:
1702 *status = U_INVALID_FORMAT_ERROR;
1703 error(startline, "Unexpected EOF encountered");
1704 return NULL;
1705
1706 case TOK_ERROR:
1707 *status = U_INVALID_FORMAT_ERROR;
1708 return NULL;
1709
1710 case TOK_COLON:
1711 resType = parseResourceType(status);
1712 expect(TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
1713
1714 if (U_FAILURE(*status))
1715 {
1716 return NULL;
1717 }
1718
1719 break;
1720
1721 case TOK_OPEN_BRACE:
1722 break;
1723
1724 default:
1725 *status = U_INVALID_FORMAT_ERROR;
1726 error(startline, "syntax error while reading a resource, expected '{' or ':'");
1727 return NULL;
1728 }
1729
1730 if (resType == RT_UNKNOWN)
1731 {
1732 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
1733 We could have any of the following:
1734 { { => array (nested)
1735 { :/} => array
1736 { string , => string array
1737
1738 commented by Jing/GCL
1739 { string { => table
1740
1741 added by Jing/GCL
1742
1743 { string :/{ => table
1744 { string } => string
1745 */
1746
1747 token = peekToken(0, NULL, &line, NULL,status);
1748
1749 if (U_FAILURE(*status))
1750 {
1751 return NULL;
1752 }
1753
1754 /* Commented by Jing/GCL */
1755 /* if (token == TOK_OPEN_BRACE || token == TOK_COLON )*/
1756 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
1757 {
1758 resType = RT_ARRAY;
1759 }
1760 else if (token == TOK_STRING)
1761 {
1762 token = peekToken(1, NULL, &line, NULL, status);
1763
1764 if (U_FAILURE(*status))
1765 {
1766 return NULL;
1767 }
1768
1769 switch (token)
1770 {
1771 case TOK_COMMA: resType = RT_ARRAY; break;
1772 case TOK_OPEN_BRACE: resType = RT_TABLE; break;
1773 case TOK_CLOSE_BRACE: resType = RT_STRING; break;
1774 /* added by Jing/GCL to make table work when :table is omitted */
1775 case TOK_COLON: resType = RT_TABLE; break;
1776 default:
1777 *status = U_INVALID_FORMAT_ERROR;
1778 error(line, "Unexpected token after string, expected ',', '{' or '}'");
1779 return NULL;
1780 }
1781 }
1782 else
1783 {
1784 *status = U_INVALID_FORMAT_ERROR;
1785 error(line, "Unexpected token after '{'");
1786 return NULL;
1787 }
1788
1789 /* printf("Type guessed as %s\n", resourceNames[resType]); */
1790 } else if(resType == RT_TABLE_NO_FALLBACK) {
1791 *status = U_INVALID_FORMAT_ERROR;
1792 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
1793 return NULL;
1794 }
1795
1796 /* We should now know what we need to parse next, so call the appropriate parser
1797 function and return. */
1798 parseFunction = gResourceTypes[resType].parseFunction;
1799 if (parseFunction != NULL) {
1800 return parseFunction(tag, startline, comment, status);
1801 }
1802 else {
1803 *status = U_INTERNAL_PROGRAM_ERROR;
1804 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
1805 }
1806
1807 return NULL;
1808}
1809
1810/* parse the top-level resource */
1811struct SRBRoot *
1812parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UErrorCode *status)
1813{
1814 struct UString *tokenValue;
1815 struct UString comment;
1816 uint32_t line;
1817 /* added by Jing/GCL */
1818 enum EResourceType bundleType;
1819 enum ETokenType token;
1820
1821 initLookahead(buf, status);
1822
1823 inputdir = inputDir;
1824 inputdirLength = (inputdir != NULL) ? (uint32_t)uprv_strlen(inputdir) : 0;
1825 outputdir = outputDir;
1826 outputdirLength = (outputdir != NULL) ? (uint32_t)uprv_strlen(outputdir) : 0;
1827
1828 ustr_init(&comment);
1829 expect(TOK_STRING, &tokenValue, &comment, NULL, status);
1830
1831 bundle = bundle_open(&comment, status);
1832
1833 if (bundle == NULL || U_FAILURE(*status))
1834 {
1835 return NULL;
1836 }
1837
1838
1839 bundle_setlocale(bundle, tokenValue->fChars, status);
1840 /* Commented by Jing/GCL */
1841 /* expect(TOK_OPEN_BRACE, NULL, &line, status); */
1842 /* The following code is to make Empty bundle work no matter with :table specifer or not */
1843 token = getToken(NULL, NULL, &line, status);
1844 if(token==TOK_COLON) {
1845 *status=U_ZERO_ERROR;
1846 bundleType=parseResourceType(status);
1847
1848 if(isTable(bundleType))
1849 {
1850 expect(TOK_OPEN_BRACE, NULL, NULL, &line, status);
1851 }
1852 else
1853 {
1854 *status=U_PARSE_ERROR;
1855 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
1856 }
1857 }
1858 else
1859 {
1860 /* not a colon */
1861 if(token==TOK_OPEN_BRACE)
1862 {
1863 *status=U_ZERO_ERROR;
1864 bundleType=RT_TABLE;
1865 }
1866 else
1867 {
1868 /* neither colon nor open brace */
1869 *status=U_PARSE_ERROR;
1870 bundleType=RT_UNKNOWN;
1871 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
1872 }
1873 }
1874 /* The above is added by Jing/GCL */
1875
1876 if (U_FAILURE(*status))
1877 {
1878 bundle_close(bundle, status);
1879 return NULL;
1880 }
1881
1882 if(bundleType==RT_TABLE_NO_FALLBACK) {
1883 /*
1884 * Parse a top-level table with the table(nofallback) declaration.
1885 * This is the same as a regular table, but also sets the
1886 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
1887 */
1888 bundle->noFallback=TRUE;
1889 }
1890 /* top-level tables need not handle special table names like "collations" */
1891 realParseTable(bundle->fRoot, NULL, line, status);
1892
1893 if(dependencyArray!=NULL){
1894 table_add(bundle->fRoot, dependencyArray, 0, status);
1895 dependencyArray = NULL;
1896 }
1897 if (U_FAILURE(*status))
1898 {
1899 bundle_close(bundle, status);
1900 array_close(dependencyArray, status);
1901 return NULL;
1902 }
1903
1904 if (getToken(NULL, NULL, &line, status) != TOK_EOF)
1905 {
1906 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
1907 if(isStrict()){
1908 *status = U_INVALID_FORMAT_ERROR;
1909 return NULL;
1910 }
1911 }
1912
1913 return bundle;
1914}