]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/genrb/parse.cpp
ICU-491.11.3.tar.gz
[apple/icu.git] / icuSources / tools / genrb / parse.cpp
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1998-2012, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File parse.cpp
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 05/26/99 stephen Creation.
15 * 02/25/00 weiv Overhaul to write udata
16 * 5/10/01 Ram removed ustdio dependency
17 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
18 *******************************************************************************
19 */
20
21 #include "ucol_imp.h"
22 #include "parse.h"
23 #include "errmsg.h"
24 #include "uhash.h"
25 #include "cmemory.h"
26 #include "cstring.h"
27 #include "uinvchar.h"
28 #include "read.h"
29 #include "ustr.h"
30 #include "reslist.h"
31 #include "rbt_pars.h"
32 #include "genrb.h"
33 #include "unicode/ustring.h"
34 #include "unicode/uscript.h"
35 #include "unicode/putil.h"
36 #include <stdio.h>
37
38 /* Number of tokens to read ahead of the current stream position */
39 #define MAX_LOOKAHEAD 3
40
41 #define CR 0x000D
42 #define LF 0x000A
43 #define SPACE 0x0020
44 #define TAB 0x0009
45 #define ESCAPE 0x005C
46 #define HASH 0x0023
47 #define QUOTE 0x0027
48 #define ZERO 0x0030
49 #define STARTCOMMAND 0x005B
50 #define ENDCOMMAND 0x005D
51 #define OPENSQBRACKET 0x005B
52 #define CLOSESQBRACKET 0x005D
53
54 struct Lookahead
55 {
56 enum ETokenType type;
57 struct UString value;
58 struct UString comment;
59 uint32_t line;
60 };
61
62 /* keep in sync with token defines in read.h */
63 const char *tokenNames[TOK_TOKEN_COUNT] =
64 {
65 "string", /* A string token, such as "MonthNames" */
66 "'{'", /* An opening brace character */
67 "'}'", /* A closing brace character */
68 "','", /* A comma */
69 "':'", /* A colon */
70
71 "<end of file>", /* End of the file has been reached successfully */
72 "<end of line>"
73 };
74
75 /* Just to store "TRUE" */
76 static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
77
78 typedef struct {
79 struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
80 uint32_t lookaheadPosition;
81 UCHARBUF *buffer;
82 struct SRBRoot *bundle;
83 const char *inputdir;
84 uint32_t inputdirLength;
85 const char *outputdir;
86 uint32_t outputdirLength;
87 UBool makeBinaryCollation;
88 } ParseState;
89
90 static UBool gOmitCollationRules = FALSE;
91
92 typedef struct SResource *
93 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
94
95 static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
96
97 /* The nature of the lookahead buffer:
98 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
99 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
100 When getToken is called, the current pointer is moved to the next slot and the
101 old slot is filled with the next token from the reader by calling getNextToken.
102 The token values are stored in the slot, which means that token values don't
103 survive a call to getToken, ie.
104
105 UString *value;
106
107 getToken(&value, NULL, status);
108 getToken(NULL, NULL, status); bad - value is now a different string
109 */
110 static void
111 initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
112 {
113 static uint32_t initTypeStrings = 0;
114 uint32_t i;
115
116 if (!initTypeStrings)
117 {
118 initTypeStrings = 1;
119 }
120
121 state->lookaheadPosition = 0;
122 state->buffer = buf;
123
124 resetLineNumber();
125
126 for (i = 0; i < MAX_LOOKAHEAD; i++)
127 {
128 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
129 if (U_FAILURE(*status))
130 {
131 return;
132 }
133 }
134
135 *status = U_ZERO_ERROR;
136 }
137
138 static void
139 cleanupLookahead(ParseState* state)
140 {
141 uint32_t i;
142 for (i = 0; i < MAX_LOOKAHEAD; i++)
143 {
144 ustr_deinit(&state->lookahead[i].value);
145 ustr_deinit(&state->lookahead[i].comment);
146 }
147
148 }
149
150 static enum ETokenType
151 getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
152 {
153 enum ETokenType result;
154 uint32_t i;
155
156 result = state->lookahead[state->lookaheadPosition].type;
157
158 if (tokenValue != NULL)
159 {
160 *tokenValue = &state->lookahead[state->lookaheadPosition].value;
161 }
162
163 if (linenumber != NULL)
164 {
165 *linenumber = state->lookahead[state->lookaheadPosition].line;
166 }
167
168 if (comment != NULL)
169 {
170 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
171 }
172
173 i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
174 state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
175 ustr_setlen(&state->lookahead[i].comment, 0, status);
176 ustr_setlen(&state->lookahead[i].value, 0, status);
177 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
178
179 /* printf("getToken, returning %s\n", tokenNames[result]); */
180
181 return result;
182 }
183
184 static enum ETokenType
185 peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
186 {
187 uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
188
189 if (U_FAILURE(*status))
190 {
191 return TOK_ERROR;
192 }
193
194 if (lookaheadCount >= MAX_LOOKAHEAD)
195 {
196 *status = U_INTERNAL_PROGRAM_ERROR;
197 return TOK_ERROR;
198 }
199
200 if (tokenValue != NULL)
201 {
202 *tokenValue = &state->lookahead[i].value;
203 }
204
205 if (linenumber != NULL)
206 {
207 *linenumber = state->lookahead[i].line;
208 }
209
210 if(comment != NULL){
211 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
212 }
213
214 return state->lookahead[i].type;
215 }
216
217 static void
218 expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
219 {
220 uint32_t line;
221
222 enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
223
224 if (linenumber != NULL)
225 {
226 *linenumber = line;
227 }
228
229 if (U_FAILURE(*status))
230 {
231 return;
232 }
233
234 if (token != expectedToken)
235 {
236 *status = U_INVALID_FORMAT_ERROR;
237 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
238 }
239 else
240 {
241 *status = U_ZERO_ERROR;
242 }
243 }
244
245 static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
246 {
247 struct UString *tokenValue;
248 char *result;
249 uint32_t count;
250
251 expect(state, TOK_STRING, &tokenValue, comment, line, status);
252
253 if (U_FAILURE(*status))
254 {
255 return NULL;
256 }
257
258 count = u_strlen(tokenValue->fChars);
259 if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
260 *status = U_INVALID_FORMAT_ERROR;
261 error(*line, "invariant characters required for table keys, binary data, etc.");
262 return NULL;
263 }
264
265 result = reinterpret_cast<char *>(uprv_malloc(count+1));
266
267 if (result == NULL)
268 {
269 *status = U_MEMORY_ALLOCATION_ERROR;
270 return NULL;
271 }
272
273 u_UCharsToChars(tokenValue->fChars, result, count+1);
274 return result;
275 }
276
277 static struct SResource *
278 parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
279 {
280 struct SResource *result = NULL;
281 struct UString *tokenValue;
282 FileStream *file = NULL;
283 char filename[256] = { '\0' };
284 char cs[128] = { '\0' };
285 uint32_t line;
286 int len=0;
287 UBool quoted = FALSE;
288 UCHARBUF *ucbuf=NULL;
289 UChar32 c = 0;
290 const char* cp = NULL;
291 UChar *pTarget = NULL;
292 UChar *target = NULL;
293 UChar *targetLimit = NULL;
294 int32_t size = 0;
295
296 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
297
298 if(isVerbose()){
299 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
300 }
301
302 if (U_FAILURE(*status))
303 {
304 return NULL;
305 }
306 /* make the filename including the directory */
307 if (state->inputdir != NULL)
308 {
309 uprv_strcat(filename, state->inputdir);
310
311 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
312 {
313 uprv_strcat(filename, U_FILE_SEP_STRING);
314 }
315 }
316
317 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
318
319 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
320
321 if (U_FAILURE(*status))
322 {
323 return NULL;
324 }
325 uprv_strcat(filename, cs);
326
327 if(gOmitCollationRules) {
328 return res_none();
329 }
330
331 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
332
333 if (U_FAILURE(*status)) {
334 error(line, "An error occured while opening the input file %s\n", filename);
335 return NULL;
336 }
337
338 /* We allocate more space than actually required
339 * since the actual size needed for storing UChars
340 * is not known in UTF-8 byte stream
341 */
342 size = ucbuf_size(ucbuf) + 1;
343 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
344 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
345 target = pTarget;
346 targetLimit = pTarget+size;
347
348 /* read the rules into the buffer */
349 while (target < targetLimit)
350 {
351 c = ucbuf_getc(ucbuf, status);
352 if(c == QUOTE) {
353 quoted = (UBool)!quoted;
354 }
355 /* weiv (06/26/2002): adding the following:
356 * - preserving spaces in commands [...]
357 * - # comments until the end of line
358 */
359 if (c == STARTCOMMAND && !quoted)
360 {
361 /* preserve commands
362 * closing bracket will be handled by the
363 * append at the end of the loop
364 */
365 while(c != ENDCOMMAND) {
366 U_APPEND_CHAR32(c, target,len);
367 c = ucbuf_getc(ucbuf, status);
368 }
369 }
370 else if (c == HASH && !quoted) {
371 /* skip comments */
372 while(c != CR && c != LF) {
373 c = ucbuf_getc(ucbuf, status);
374 }
375 continue;
376 }
377 else if (c == ESCAPE)
378 {
379 c = unescape(ucbuf, status);
380
381 if (c == (UChar32)U_ERR)
382 {
383 uprv_free(pTarget);
384 T_FileStream_close(file);
385 return NULL;
386 }
387 }
388 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
389 {
390 /* ignore spaces carriage returns
391 * and line feed unless in the form \uXXXX
392 */
393 continue;
394 }
395
396 /* Append UChar * after dissembling if c > 0xffff*/
397 if (c != (UChar32)U_EOF)
398 {
399 U_APPEND_CHAR32(c, target,len);
400 }
401 else
402 {
403 break;
404 }
405 }
406
407 /* terminate the string */
408 if(target < targetLimit){
409 *target = 0x0000;
410 }
411
412 result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
413
414
415 ucbuf_close(ucbuf);
416 uprv_free(pTarget);
417 T_FileStream_close(file);
418
419 return result;
420 }
421
422 static struct SResource *
423 parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
424 {
425 struct SResource *result = NULL;
426 struct UString *tokenValue;
427 FileStream *file = NULL;
428 char filename[256] = { '\0' };
429 char cs[128] = { '\0' };
430 uint32_t line;
431 UCHARBUF *ucbuf=NULL;
432 const char* cp = NULL;
433 UChar *pTarget = NULL;
434 const UChar *pSource = NULL;
435 int32_t size = 0;
436
437 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
438
439 if(isVerbose()){
440 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
441 }
442
443 if (U_FAILURE(*status))
444 {
445 return NULL;
446 }
447 /* make the filename including the directory */
448 if (state->inputdir != NULL)
449 {
450 uprv_strcat(filename, state->inputdir);
451
452 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
453 {
454 uprv_strcat(filename, U_FILE_SEP_STRING);
455 }
456 }
457
458 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
459
460 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
461
462 if (U_FAILURE(*status))
463 {
464 return NULL;
465 }
466 uprv_strcat(filename, cs);
467
468
469 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
470
471 if (U_FAILURE(*status)) {
472 error(line, "An error occured while opening the input file %s\n", filename);
473 return NULL;
474 }
475
476 /* We allocate more space than actually required
477 * since the actual size needed for storing UChars
478 * is not known in UTF-8 byte stream
479 */
480 pSource = ucbuf_getBuffer(ucbuf, &size, status);
481 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
482 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
483
484 #if !UCONFIG_NO_TRANSLITERATION
485 size = utrans_stripRules(pSource, size, pTarget, status);
486 #else
487 size = 0;
488 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
489 #endif
490 result = string_open(state->bundle, tag, pTarget, size, NULL, status);
491
492 ucbuf_close(ucbuf);
493 uprv_free(pTarget);
494 T_FileStream_close(file);
495
496 return result;
497 }
498 static struct SResource* dependencyArray = NULL;
499
500 static struct SResource *
501 parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
502 {
503 struct SResource *result = NULL;
504 struct SResource *elem = NULL;
505 struct UString *tokenValue;
506 uint32_t line;
507 char filename[256] = { '\0' };
508 char cs[128] = { '\0' };
509
510 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
511
512 if(isVerbose()){
513 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
514 }
515
516 if (U_FAILURE(*status))
517 {
518 return NULL;
519 }
520 /* make the filename including the directory */
521 if (state->outputdir != NULL)
522 {
523 uprv_strcat(filename, state->outputdir);
524
525 if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
526 {
527 uprv_strcat(filename, U_FILE_SEP_STRING);
528 }
529 }
530
531 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
532
533 if (U_FAILURE(*status))
534 {
535 return NULL;
536 }
537 uprv_strcat(filename, cs);
538 if(!T_FileStream_file_exists(filename)){
539 if(isStrict()){
540 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
541 }else{
542 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
543 }
544 }
545 if(dependencyArray==NULL){
546 dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
547 }
548 if(tag!=NULL){
549 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
550 }
551 elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
552
553 array_add(dependencyArray, elem, status);
554
555 if (U_FAILURE(*status))
556 {
557 return NULL;
558 }
559 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
560 return result;
561 }
562 static struct SResource *
563 parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
564 {
565 struct UString *tokenValue;
566 struct SResource *result = NULL;
567
568 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
569 {
570 return parseUCARules(tag, startline, status);
571 }*/
572 if(isVerbose()){
573 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
574 }
575 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
576
577 if (U_SUCCESS(*status))
578 {
579 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
580 doesn't survive expect either) */
581
582 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
583 if(U_SUCCESS(*status) && result) {
584 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
585
586 if (U_FAILURE(*status))
587 {
588 res_close(result);
589 return NULL;
590 }
591 }
592 }
593
594 return result;
595 }
596
597 static struct SResource *
598 parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
599 {
600 struct UString *tokenValue;
601 struct SResource *result = NULL;
602
603 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
604
605 if(isVerbose()){
606 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
607 }
608
609 if (U_SUCCESS(*status))
610 {
611 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
612 doesn't survive expect either) */
613
614 result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
615
616 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
617
618 if (U_FAILURE(*status))
619 {
620 res_close(result);
621 return NULL;
622 }
623 }
624
625 return result;
626 }
627
628 typedef struct{
629 const char* inputDir;
630 const char* outputDir;
631 } GenrbData;
632
633 static struct SResource* resLookup(struct SResource* res, const char* key){
634 struct SResource *current = NULL;
635 struct SResTable *list;
636 if (res == res_none()) {
637 return NULL;
638 }
639
640 list = &(res->u.fTable);
641
642 current = list->fFirst;
643 while (current != NULL) {
644 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
645 return current;
646 }
647 current = current->fNext;
648 }
649 return NULL;
650 }
651
652 static const UChar* importFromDataFile(void* context, const char* locale, const char* type, int32_t* pLength, UErrorCode* status){
653 struct SRBRoot *data = NULL;
654 UCHARBUF *ucbuf = NULL;
655 GenrbData* genrbdata = (GenrbData*) context;
656 int localeLength = strlen(locale);
657 char* filename = (char*)uprv_malloc(localeLength+5);
658 char *inputDirBuf = NULL;
659 char *openFileName = NULL;
660 const char* cp = "";
661 UChar* urules = NULL;
662 int32_t urulesLength = 0;
663 int32_t i = 0;
664 int32_t dirlen = 0;
665 int32_t filelen = 0;
666 struct SResource* root;
667 struct SResource* collations;
668 struct SResource* collation;
669 struct SResource* sequence;
670
671 memcpy(filename, locale, localeLength);
672 for(i = 0; i < localeLength; i++){
673 if(filename[i] == '-'){
674 filename[i] = '_';
675 }
676 }
677 filename[localeLength] = '.';
678 filename[localeLength+1] = 't';
679 filename[localeLength+2] = 'x';
680 filename[localeLength+3] = 't';
681 filename[localeLength+4] = 0;
682
683
684 if (status==NULL || U_FAILURE(*status)) {
685 return NULL;
686 }
687 if(filename==NULL){
688 *status=U_ILLEGAL_ARGUMENT_ERROR;
689 return NULL;
690 }else{
691 filelen = (int32_t)uprv_strlen(filename);
692 }
693 if(genrbdata->inputDir == NULL) {
694 const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
695 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
696 openFileName[0] = '\0';
697 if (filenameBegin != NULL) {
698 /*
699 * When a filename ../../../data/root.txt is specified,
700 * we presume that the input directory is ../../../data
701 * This is very important when the resource file includes
702 * another file, like UCARules.txt or thaidict.brk.
703 */
704 int32_t filenameSize = (int32_t)(filenameBegin - filename + 1);
705 inputDirBuf = uprv_strncpy((char *)uprv_malloc(filenameSize), filename, filenameSize);
706
707 /* test for NULL */
708 if(inputDirBuf == NULL) {
709 *status = U_MEMORY_ALLOCATION_ERROR;
710 goto finish;
711 }
712
713 inputDirBuf[filenameSize - 1] = 0;
714 genrbdata->inputDir = inputDirBuf;
715 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir);
716 }
717 }else{
718 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir);
719
720 if(genrbdata->inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
721 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
722
723 /* test for NULL */
724 if(openFileName == NULL) {
725 *status = U_MEMORY_ALLOCATION_ERROR;
726 goto finish;
727 }
728
729 openFileName[0] = '\0';
730 /*
731 * append the input dir to openFileName if the first char in
732 * filename is not file seperation char and the last char input directory is not '.'.
733 * This is to support :
734 * genrb -s. /home/icu/data
735 * genrb -s. icu/data
736 * The user cannot mix notations like
737 * genrb -s. /icu/data --- the absolute path specified. -s redundant
738 * user should use
739 * genrb -s. icu/data --- start from CWD and look in icu/data dir
740 */
741 if( (filename[0] != U_FILE_SEP_CHAR) && (genrbdata->inputDir[dirlen-1] !='.')){
742 uprv_strcpy(openFileName, genrbdata->inputDir);
743 openFileName[dirlen] = U_FILE_SEP_CHAR;
744 }
745 openFileName[dirlen + 1] = '\0';
746 } else {
747 openFileName = (char *) uprv_malloc(dirlen + filelen + 1);
748
749 /* test for NULL */
750 if(openFileName == NULL) {
751 *status = U_MEMORY_ALLOCATION_ERROR;
752 goto finish;
753 }
754
755 uprv_strcpy(openFileName, genrbdata->inputDir);
756
757 }
758 }
759 uprv_strcat(openFileName, filename);
760 /* printf("%s\n", openFileName); */
761 *status = U_ZERO_ERROR;
762 ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, status);
763
764 if(*status == U_FILE_ACCESS_ERROR) {
765
766 fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName);
767 goto finish;
768 }
769 if (ucbuf == NULL || U_FAILURE(*status)) {
770 fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(*status));
771 goto finish;
772 }
773
774 /* Parse the data into an SRBRoot */
775 data = parse(ucbuf, genrbdata->inputDir, genrbdata->outputDir, FALSE, status);
776
777 root = data->fRoot;
778 collations = resLookup(root, "collations");
779 if (collations != NULL) {
780 collation = resLookup(collations, type);
781 if (collation != NULL) {
782 sequence = resLookup(collation, "Sequence");
783 if (sequence != NULL) {
784 urules = sequence->u.fString.fChars;
785 urulesLength = sequence->u.fString.fLength;
786 *pLength = urulesLength;
787 }
788 }
789 }
790
791 finish:
792 if (inputDirBuf != NULL) {
793 uprv_free(inputDirBuf);
794 }
795
796 if (openFileName != NULL) {
797 uprv_free(openFileName);
798 }
799
800 if(ucbuf) {
801 ucbuf_close(ucbuf);
802 }
803
804 return urules;
805 }
806
807 // Quick-and-dirty escaping function.
808 // Assumes that we are on an ASCII-based platform.
809 static void
810 escape(const UChar *s, char *buffer) {
811 int32_t length = u_strlen(s);
812 int32_t i = 0;
813 for (;;) {
814 UChar32 c;
815 U16_NEXT(s, i, length, c);
816 if (c == 0) {
817 *buffer = 0;
818 return;
819 } else if (0x20 <= c && c <= 0x7e) {
820 // printable ASCII
821 *buffer++ = (char)c; // assumes ASCII-based platform
822 } else {
823 buffer += sprintf(buffer, "\\u%04X", (int)c);
824 }
825 }
826 }
827
828 static struct SResource *
829 addCollation(ParseState* state, struct SResource *result, uint32_t startline, UErrorCode *status)
830 {
831 struct SResource *member = NULL;
832 struct UString *tokenValue;
833 struct UString comment;
834 enum ETokenType token;
835 char subtag[1024];
836 UVersionInfo version;
837 uint32_t line;
838 GenrbData genrbdata;
839 /* '{' . (name resource)* '}' */
840 version[0]=0; version[1]=0; version[2]=0; version[3]=0;
841
842 for (;;)
843 {
844 ustr_init(&comment);
845 token = getToken(state, &tokenValue, &comment, &line, status);
846
847 if (token == TOK_CLOSE_BRACE)
848 {
849 return result;
850 }
851
852 if (token != TOK_STRING)
853 {
854 res_close(result);
855 *status = U_INVALID_FORMAT_ERROR;
856
857 if (token == TOK_EOF)
858 {
859 error(startline, "unterminated table");
860 }
861 else
862 {
863 error(line, "Unexpected token %s", tokenNames[token]);
864 }
865
866 return NULL;
867 }
868
869 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
870
871 if (U_FAILURE(*status))
872 {
873 res_close(result);
874 return NULL;
875 }
876
877 member = parseResource(state, subtag, NULL, status);
878
879 if (U_FAILURE(*status))
880 {
881 res_close(result);
882 return NULL;
883 }
884
885 if (uprv_strcmp(subtag, "Version") == 0)
886 {
887 char ver[40];
888 int32_t length = member->u.fString.fLength;
889
890 if (length >= (int32_t) sizeof(ver))
891 {
892 length = (int32_t) sizeof(ver) - 1;
893 }
894
895 u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
896 u_versionFromString(version, ver);
897
898 table_add(result, member, line, status);
899
900 }
901 else if (uprv_strcmp(subtag, "Override") == 0)
902 {
903 // UBool override = (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0);
904 table_add(result, member, line, status);
905
906 }
907 else if(uprv_strcmp(subtag, "%%CollationBin")==0)
908 {
909 /* discard duplicate %%CollationBin if any*/
910 }
911 else if (uprv_strcmp(subtag, "Sequence") == 0)
912 {
913 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
914 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
915 #else
916 if(state->makeBinaryCollation) {
917
918 /* do the collation elements */
919 int32_t len = 0;
920 uint8_t *data = NULL;
921 UCollator *coll = NULL;
922 int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)];
923 int32_t reorderCodeCount;
924 int32_t reorderCodeIndex;
925 UParseError parseError;
926
927 genrbdata.inputDir = state->inputdir;
928 genrbdata.outputDir = state->outputdir;
929
930 UErrorCode intStatus = U_ZERO_ERROR;
931 uprv_memset(&parseError, 0, sizeof(parseError));
932 coll = ucol_openRulesForImport(member->u.fString.fChars, member->u.fString.fLength,
933 UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, importFromDataFile, &genrbdata, &intStatus);
934
935 if (U_SUCCESS(intStatus) && coll != NULL)
936 {
937 len = ucol_cloneBinary(coll, NULL, 0, &intStatus);
938 data = (uint8_t *)uprv_malloc(len);
939 intStatus = U_ZERO_ERROR;
940 len = ucol_cloneBinary(coll, data, len, &intStatus);
941 /*data = ucol_cloneRuleData(coll, &len, &intStatus);*/
942
943 /* tailoring rules version */
944 /* This is wrong! */
945 /*coll->dataInfo.dataVersion[1] = version[0];*/
946 /* Copy tailoring version. Builder version already */
947 /* set in ucol_openRules */
948 ((UCATableHeader *)data)->version[1] = version[0];
949 ((UCATableHeader *)data)->version[2] = version[1];
950 ((UCATableHeader *)data)->version[3] = version[2];
951
952 if (U_SUCCESS(intStatus) && data != NULL)
953 {
954 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", len, data, NULL, NULL, status);
955 table_add(result, collationBin, line, status);
956 uprv_free(data);
957
958 reorderCodeCount = ucol_getReorderCodes(
959 coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST), &intStatus);
960 if (U_SUCCESS(intStatus) && reorderCodeCount > 0) {
961 struct SResource *reorderCodeRes = intvector_open(state->bundle, "%%ReorderCodes", NULL, status);
962 for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodeCount; reorderCodeIndex++) {
963 intvector_add(reorderCodeRes, reorderCodes[reorderCodeIndex], status);
964 }
965 table_add(result, reorderCodeRes, line, status);
966 }
967 }
968 else
969 {
970 warning(line, "could not obtain rules from collator");
971 if(isStrict()){
972 *status = U_INVALID_FORMAT_ERROR;
973 return NULL;
974 }
975 }
976
977 ucol_close(coll);
978 }
979 else
980 {
981 if(intStatus == U_FILE_ACCESS_ERROR) {
982 error(startline, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly.");
983 *status = intStatus;
984 return NULL;
985 }
986 char preBuffer[100], postBuffer[100];
987 escape(parseError.preContext, preBuffer);
988 escape(parseError.postContext, postBuffer);
989 warning(line,
990 "%%%%CollationBin could not be constructed from CollationElements\n"
991 " check context, check that the FractionalUCA.txt UCA version "
992 "matches the current UCD version\n"
993 " UErrorCode=%s UParseError={ line=%d offset=%d pre=<> post=<> }",
994 u_errorName(intStatus),
995 parseError.line,
996 parseError.offset,
997 preBuffer,
998 postBuffer);
999 if(isStrict()){
1000 *status = intStatus;
1001 return NULL;
1002 }
1003 }
1004 } else {
1005 if(isVerbose()) {
1006 printf("Not building Collation binary\n");
1007 }
1008 }
1009 #endif
1010 /* in order to achieve smaller data files, we can direct genrb */
1011 /* to omit collation rules */
1012 if(gOmitCollationRules) {
1013 bundle_closeString(state->bundle, member);
1014 } else {
1015 table_add(result, member, line, status);
1016 }
1017 }
1018 if (U_FAILURE(*status))
1019 {
1020 res_close(result);
1021 return NULL;
1022 }
1023 }
1024
1025 // Reached the end without a TOK_CLOSE_BRACE. Should be an error.
1026 *status = U_INTERNAL_PROGRAM_ERROR;
1027 return NULL;
1028 }
1029
1030 static struct SResource *
1031 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
1032 {
1033 struct SResource *result = NULL;
1034 struct SResource *member = NULL;
1035 struct SResource *collationRes = NULL;
1036 struct UString *tokenValue;
1037 struct UString comment;
1038 enum ETokenType token;
1039 char subtag[1024], typeKeyword[1024];
1040 uint32_t line;
1041
1042 result = table_open(state->bundle, tag, NULL, status);
1043
1044 if (result == NULL || U_FAILURE(*status))
1045 {
1046 return NULL;
1047 }
1048 if(isVerbose()){
1049 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1050 }
1051 if(!newCollation) {
1052 return addCollation(state, result, startline, status);
1053 }
1054 else {
1055 for(;;) {
1056 ustr_init(&comment);
1057 token = getToken(state, &tokenValue, &comment, &line, status);
1058
1059 if (token == TOK_CLOSE_BRACE)
1060 {
1061 return result;
1062 }
1063
1064 if (token != TOK_STRING)
1065 {
1066 res_close(result);
1067 *status = U_INVALID_FORMAT_ERROR;
1068
1069 if (token == TOK_EOF)
1070 {
1071 error(startline, "unterminated table");
1072 }
1073 else
1074 {
1075 error(line, "Unexpected token %s", tokenNames[token]);
1076 }
1077
1078 return NULL;
1079 }
1080
1081 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1082
1083 if (U_FAILURE(*status))
1084 {
1085 res_close(result);
1086 return NULL;
1087 }
1088
1089 if (uprv_strcmp(subtag, "default") == 0)
1090 {
1091 member = parseResource(state, subtag, NULL, status);
1092
1093 if (U_FAILURE(*status))
1094 {
1095 res_close(result);
1096 return NULL;
1097 }
1098
1099 table_add(result, member, line, status);
1100 }
1101 else
1102 {
1103 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
1104 /* this probably needs to be refactored or recursively use the parser */
1105 /* first we assume that our collation table won't have the explicit type */
1106 /* then, we cannot handle aliases */
1107 if(token == TOK_OPEN_BRACE) {
1108 token = getToken(state, &tokenValue, &comment, &line, status);
1109 collationRes = table_open(state->bundle, subtag, NULL, status);
1110 collationRes = addCollation(state, collationRes, startline, status); /* need to parse the collation data regardless */
1111 if (gIncludeUnihanColl || uprv_strcmp(subtag, "unihan") != 0) {
1112 table_add(result, collationRes, startline, status);
1113 }
1114 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
1115 /* we could have a table too */
1116 token = peekToken(state, 1, &tokenValue, &line, &comment, status);
1117 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
1118 if(uprv_strcmp(typeKeyword, "alias") == 0) {
1119 member = parseResource(state, subtag, NULL, status);
1120 if (U_FAILURE(*status))
1121 {
1122 res_close(result);
1123 return NULL;
1124 }
1125
1126 table_add(result, member, line, status);
1127 } else {
1128 res_close(result);
1129 *status = U_INVALID_FORMAT_ERROR;
1130 return NULL;
1131 }
1132 } else {
1133 res_close(result);
1134 *status = U_INVALID_FORMAT_ERROR;
1135 return NULL;
1136 }
1137 }
1138
1139 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1140
1141 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1142
1143 if (U_FAILURE(*status))
1144 {
1145 res_close(result);
1146 return NULL;
1147 }
1148 }
1149 }
1150 }
1151
1152 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1153 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1154 static struct SResource *
1155 realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
1156 {
1157 struct SResource *member = NULL;
1158 struct UString *tokenValue=NULL;
1159 struct UString comment;
1160 enum ETokenType token;
1161 char subtag[1024];
1162 uint32_t line;
1163 UBool readToken = FALSE;
1164
1165 /* '{' . (name resource)* '}' */
1166
1167 if(isVerbose()){
1168 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1169 }
1170 for (;;)
1171 {
1172 ustr_init(&comment);
1173 token = getToken(state, &tokenValue, &comment, &line, status);
1174
1175 if (token == TOK_CLOSE_BRACE)
1176 {
1177 if (!readToken) {
1178 warning(startline, "Encountered empty table");
1179 }
1180 return table;
1181 }
1182
1183 if (token != TOK_STRING)
1184 {
1185 *status = U_INVALID_FORMAT_ERROR;
1186
1187 if (token == TOK_EOF)
1188 {
1189 error(startline, "unterminated table");
1190 }
1191 else
1192 {
1193 error(line, "unexpected token %s", tokenNames[token]);
1194 }
1195
1196 return NULL;
1197 }
1198
1199 if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
1200 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1201 } else {
1202 *status = U_INVALID_FORMAT_ERROR;
1203 error(line, "invariant characters required for table keys");
1204 return NULL;
1205 }
1206
1207 if (U_FAILURE(*status))
1208 {
1209 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
1210 return NULL;
1211 }
1212
1213 member = parseResource(state, subtag, &comment, status);
1214
1215 if (member == NULL || U_FAILURE(*status))
1216 {
1217 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
1218 return NULL;
1219 }
1220
1221 table_add(table, member, line, status);
1222
1223 if (U_FAILURE(*status))
1224 {
1225 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
1226 return NULL;
1227 }
1228 readToken = TRUE;
1229 ustr_deinit(&comment);
1230 }
1231
1232 /* not reached */
1233 /* A compiler warning will appear if all paths don't contain a return statement. */
1234 /* *status = U_INTERNAL_PROGRAM_ERROR;
1235 return NULL;*/
1236 }
1237
1238 static struct SResource *
1239 parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1240 {
1241 struct SResource *result;
1242
1243 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
1244 {
1245 return parseCollationElements(state, tag, startline, FALSE, status);
1246 }
1247 if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1248 {
1249 return parseCollationElements(state, tag, startline, TRUE, status);
1250 }
1251 if(isVerbose()){
1252 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1253 }
1254
1255 result = table_open(state->bundle, tag, comment, status);
1256
1257 if (result == NULL || U_FAILURE(*status))
1258 {
1259 return NULL;
1260 }
1261 return realParseTable(state, result, tag, startline, status);
1262 }
1263
1264 static struct SResource *
1265 parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1266 {
1267 struct SResource *result = NULL;
1268 struct SResource *member = NULL;
1269 struct UString *tokenValue;
1270 struct UString memberComments;
1271 enum ETokenType token;
1272 UBool readToken = FALSE;
1273
1274 result = array_open(state->bundle, tag, comment, status);
1275
1276 if (result == NULL || U_FAILURE(*status))
1277 {
1278 return NULL;
1279 }
1280 if(isVerbose()){
1281 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1282 }
1283
1284 ustr_init(&memberComments);
1285
1286 /* '{' . resource [','] '}' */
1287 for (;;)
1288 {
1289 /* reset length */
1290 ustr_setlen(&memberComments, 0, status);
1291
1292 /* check for end of array, but don't consume next token unless it really is the end */
1293 token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
1294
1295
1296 if (token == TOK_CLOSE_BRACE)
1297 {
1298 getToken(state, NULL, NULL, NULL, status);
1299 if (!readToken) {
1300 warning(startline, "Encountered empty array");
1301 }
1302 break;
1303 }
1304
1305 if (token == TOK_EOF)
1306 {
1307 res_close(result);
1308 *status = U_INVALID_FORMAT_ERROR;
1309 error(startline, "unterminated array");
1310 return NULL;
1311 }
1312
1313 /* string arrays are a special case */
1314 if (token == TOK_STRING)
1315 {
1316 getToken(state, &tokenValue, &memberComments, NULL, status);
1317 member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
1318 }
1319 else
1320 {
1321 member = parseResource(state, NULL, &memberComments, status);
1322 }
1323
1324 if (member == NULL || U_FAILURE(*status))
1325 {
1326 res_close(result);
1327 return NULL;
1328 }
1329
1330 array_add(result, member, status);
1331
1332 if (U_FAILURE(*status))
1333 {
1334 res_close(result);
1335 return NULL;
1336 }
1337
1338 /* eat optional comma if present */
1339 token = peekToken(state, 0, NULL, NULL, NULL, status);
1340
1341 if (token == TOK_COMMA)
1342 {
1343 getToken(state, NULL, NULL, NULL, status);
1344 }
1345
1346 if (U_FAILURE(*status))
1347 {
1348 res_close(result);
1349 return NULL;
1350 }
1351 readToken = TRUE;
1352 }
1353
1354 ustr_deinit(&memberComments);
1355 return result;
1356 }
1357
1358 static struct SResource *
1359 parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1360 {
1361 struct SResource *result = NULL;
1362 enum ETokenType token;
1363 char *string;
1364 int32_t value;
1365 UBool readToken = FALSE;
1366 char *stopstring;
1367 uint32_t len;
1368 struct UString memberComments;
1369
1370 result = intvector_open(state->bundle, tag, comment, status);
1371
1372 if (result == NULL || U_FAILURE(*status))
1373 {
1374 return NULL;
1375 }
1376
1377 if(isVerbose()){
1378 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1379 }
1380 ustr_init(&memberComments);
1381 /* '{' . string [','] '}' */
1382 for (;;)
1383 {
1384 ustr_setlen(&memberComments, 0, status);
1385
1386 /* check for end of array, but don't consume next token unless it really is the end */
1387 token = peekToken(state, 0, NULL, NULL,&memberComments, status);
1388
1389 if (token == TOK_CLOSE_BRACE)
1390 {
1391 /* it's the end, consume the close brace */
1392 getToken(state, NULL, NULL, NULL, status);
1393 if (!readToken) {
1394 warning(startline, "Encountered empty int vector");
1395 }
1396 ustr_deinit(&memberComments);
1397 return result;
1398 }
1399
1400 string = getInvariantString(state, NULL, NULL, status);
1401
1402 if (U_FAILURE(*status))
1403 {
1404 res_close(result);
1405 return NULL;
1406 }
1407
1408 /* For handling illegal char in the Intvector */
1409 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1410 len=(uint32_t)(stopstring-string);
1411
1412 if(len==uprv_strlen(string))
1413 {
1414 intvector_add(result, value, status);
1415 uprv_free(string);
1416 token = peekToken(state, 0, NULL, NULL, NULL, status);
1417 }
1418 else
1419 {
1420 uprv_free(string);
1421 *status=U_INVALID_CHAR_FOUND;
1422 }
1423
1424 if (U_FAILURE(*status))
1425 {
1426 res_close(result);
1427 return NULL;
1428 }
1429
1430 /* the comma is optional (even though it is required to prevent the reader from concatenating
1431 consecutive entries) so that a missing comma on the last entry isn't an error */
1432 if (token == TOK_COMMA)
1433 {
1434 getToken(state, NULL, NULL, NULL, status);
1435 }
1436 readToken = TRUE;
1437 }
1438
1439 /* not reached */
1440 /* A compiler warning will appear if all paths don't contain a return statement. */
1441 /* intvector_close(result, status);
1442 *status = U_INTERNAL_PROGRAM_ERROR;
1443 return NULL;*/
1444 }
1445
1446 static struct SResource *
1447 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1448 {
1449 struct SResource *result = NULL;
1450 uint8_t *value;
1451 char *string;
1452 char toConv[3] = {'\0', '\0', '\0'};
1453 uint32_t count;
1454 uint32_t i;
1455 uint32_t line;
1456 char *stopstring;
1457 uint32_t len;
1458
1459 string = getInvariantString(state, &line, NULL, status);
1460
1461 if (string == NULL || U_FAILURE(*status))
1462 {
1463 return NULL;
1464 }
1465
1466 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1467
1468 if (U_FAILURE(*status))
1469 {
1470 uprv_free(string);
1471 return NULL;
1472 }
1473
1474 if(isVerbose()){
1475 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1476 }
1477
1478 count = (uint32_t)uprv_strlen(string);
1479 if (count > 0){
1480 if((count % 2)==0){
1481 value = reinterpret_cast<uint8_t *>(uprv_malloc(sizeof(uint8_t) * count));
1482
1483 if (value == NULL)
1484 {
1485 uprv_free(string);
1486 *status = U_MEMORY_ALLOCATION_ERROR;
1487 return NULL;
1488 }
1489
1490 for (i = 0; i < count; i += 2)
1491 {
1492 toConv[0] = string[i];
1493 toConv[1] = string[i + 1];
1494
1495 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
1496 len=(uint32_t)(stopstring-toConv);
1497
1498 if(len!=uprv_strlen(toConv))
1499 {
1500 uprv_free(string);
1501 *status=U_INVALID_CHAR_FOUND;
1502 return NULL;
1503 }
1504 }
1505
1506 result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status);
1507
1508 uprv_free(value);
1509 }
1510 else
1511 {
1512 *status = U_INVALID_CHAR_FOUND;
1513 uprv_free(string);
1514 error(line, "Encountered invalid binary string");
1515 return NULL;
1516 }
1517 }
1518 else
1519 {
1520 result = bin_open(state->bundle, tag, 0, NULL, "",comment,status);
1521 warning(startline, "Encountered empty binary tag");
1522 }
1523 uprv_free(string);
1524
1525 return result;
1526 }
1527
1528 static struct SResource *
1529 parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1530 {
1531 struct SResource *result = NULL;
1532 int32_t value;
1533 char *string;
1534 char *stopstring;
1535 uint32_t len;
1536
1537 string = getInvariantString(state, NULL, NULL, status);
1538
1539 if (string == NULL || U_FAILURE(*status))
1540 {
1541 return NULL;
1542 }
1543
1544 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1545
1546 if (U_FAILURE(*status))
1547 {
1548 uprv_free(string);
1549 return NULL;
1550 }
1551
1552 if(isVerbose()){
1553 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1554 }
1555
1556 if (uprv_strlen(string) <= 0)
1557 {
1558 warning(startline, "Encountered empty integer. Default value is 0.");
1559 }
1560
1561 /* Allow integer support for hexdecimal, octal digit and decimal*/
1562 /* and handle illegal char in the integer*/
1563 value = uprv_strtoul(string, &stopstring, 0);
1564 len=(uint32_t)(stopstring-string);
1565 if(len==uprv_strlen(string))
1566 {
1567 result = int_open(state->bundle, tag, value, comment, status);
1568 }
1569 else
1570 {
1571 *status=U_INVALID_CHAR_FOUND;
1572 }
1573 uprv_free(string);
1574
1575 return result;
1576 }
1577
1578 static struct SResource *
1579 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1580 {
1581 struct SResource *result;
1582 FileStream *file;
1583 int32_t len;
1584 uint8_t *data;
1585 char *filename;
1586 uint32_t line;
1587 char *fullname = NULL;
1588 filename = getInvariantString(state, &line, NULL, status);
1589
1590 if (U_FAILURE(*status))
1591 {
1592 return NULL;
1593 }
1594
1595 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1596
1597 if (U_FAILURE(*status))
1598 {
1599 uprv_free(filename);
1600 return NULL;
1601 }
1602
1603 if(isVerbose()){
1604 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1605 }
1606
1607 /* Open the input file for reading */
1608 if (state->inputdir == NULL)
1609 {
1610 #if 1
1611 /*
1612 * Always save file file name, even if there's
1613 * no input directory specified. MIGHT BREAK SOMETHING
1614 */
1615 int32_t filenameLength = uprv_strlen(filename);
1616
1617 fullname = (char *) uprv_malloc(filenameLength + 1);
1618 uprv_strcpy(fullname, filename);
1619 #endif
1620
1621 file = T_FileStream_open(filename, "rb");
1622 }
1623 else
1624 {
1625
1626 int32_t count = (int32_t)uprv_strlen(filename);
1627
1628 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1629 {
1630 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1631
1632 /* test for NULL */
1633 if(fullname == NULL)
1634 {
1635 *status = U_MEMORY_ALLOCATION_ERROR;
1636 return NULL;
1637 }
1638
1639 uprv_strcpy(fullname, state->inputdir);
1640
1641 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1642 fullname[state->inputdirLength + 1] = '\0';
1643
1644 uprv_strcat(fullname, filename);
1645 }
1646 else
1647 {
1648 fullname = (char *) uprv_malloc(state->inputdirLength + count + 1);
1649
1650 /* test for NULL */
1651 if(fullname == NULL)
1652 {
1653 *status = U_MEMORY_ALLOCATION_ERROR;
1654 return NULL;
1655 }
1656
1657 uprv_strcpy(fullname, state->inputdir);
1658 uprv_strcat(fullname, filename);
1659 }
1660
1661 file = T_FileStream_open(fullname, "rb");
1662
1663 }
1664
1665 if (file == NULL)
1666 {
1667 error(line, "couldn't open input file %s", filename);
1668 *status = U_FILE_ACCESS_ERROR;
1669 return NULL;
1670 }
1671
1672 len = T_FileStream_size(file);
1673 data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
1674 /* test for NULL */
1675 if(data == NULL)
1676 {
1677 *status = U_MEMORY_ALLOCATION_ERROR;
1678 T_FileStream_close (file);
1679 return NULL;
1680 }
1681
1682 /* int32_t numRead = */ T_FileStream_read (file, data, len);
1683 T_FileStream_close (file);
1684
1685 result = bin_open(state->bundle, tag, len, data, fullname, comment, status);
1686
1687 uprv_free(data);
1688 uprv_free(filename);
1689 uprv_free(fullname);
1690
1691 return result;
1692 }
1693
1694 static struct SResource *
1695 parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1696 {
1697 struct SResource *result;
1698 int32_t len=0;
1699 char *filename;
1700 uint32_t line;
1701 UChar *pTarget = NULL;
1702
1703 UCHARBUF *ucbuf;
1704 char *fullname = NULL;
1705 int32_t count = 0;
1706 const char* cp = NULL;
1707 const UChar* uBuffer = NULL;
1708
1709 filename = getInvariantString(state, &line, NULL, status);
1710 count = (int32_t)uprv_strlen(filename);
1711
1712 if (U_FAILURE(*status))
1713 {
1714 return NULL;
1715 }
1716
1717 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1718
1719 if (U_FAILURE(*status))
1720 {
1721 uprv_free(filename);
1722 return NULL;
1723 }
1724
1725 if(isVerbose()){
1726 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1727 }
1728
1729 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1730 /* test for NULL */
1731 if(fullname == NULL)
1732 {
1733 *status = U_MEMORY_ALLOCATION_ERROR;
1734 uprv_free(filename);
1735 return NULL;
1736 }
1737
1738 if(state->inputdir!=NULL){
1739 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1740 {
1741
1742 uprv_strcpy(fullname, state->inputdir);
1743
1744 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1745 fullname[state->inputdirLength + 1] = '\0';
1746
1747 uprv_strcat(fullname, filename);
1748 }
1749 else
1750 {
1751 uprv_strcpy(fullname, state->inputdir);
1752 uprv_strcat(fullname, filename);
1753 }
1754 }else{
1755 uprv_strcpy(fullname,filename);
1756 }
1757
1758 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1759
1760 if (U_FAILURE(*status)) {
1761 error(line, "couldn't open input file %s\n", filename);
1762 return NULL;
1763 }
1764
1765 uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
1766 result = string_open(state->bundle, tag, uBuffer, len, comment, status);
1767
1768 uprv_free(pTarget);
1769
1770 uprv_free(filename);
1771 uprv_free(fullname);
1772
1773 return result;
1774 }
1775
1776
1777
1778
1779
1780 U_STRING_DECL(k_type_string, "string", 6);
1781 U_STRING_DECL(k_type_binary, "binary", 6);
1782 U_STRING_DECL(k_type_bin, "bin", 3);
1783 U_STRING_DECL(k_type_table, "table", 5);
1784 U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17);
1785 U_STRING_DECL(k_type_int, "int", 3);
1786 U_STRING_DECL(k_type_integer, "integer", 7);
1787 U_STRING_DECL(k_type_array, "array", 5);
1788 U_STRING_DECL(k_type_alias, "alias", 5);
1789 U_STRING_DECL(k_type_intvector, "intvector", 9);
1790 U_STRING_DECL(k_type_import, "import", 6);
1791 U_STRING_DECL(k_type_include, "include", 7);
1792 U_STRING_DECL(k_type_reserved, "reserved", 8);
1793
1794 /* Various non-standard processing plugins that create one or more special resources. */
1795 U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1796 U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18);
1797 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23);
1798 U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19);
1799
1800 typedef enum EResourceType
1801 {
1802 RT_UNKNOWN,
1803 RT_STRING,
1804 RT_BINARY,
1805 RT_TABLE,
1806 RT_TABLE_NO_FALLBACK,
1807 RT_INTEGER,
1808 RT_ARRAY,
1809 RT_ALIAS,
1810 RT_INTVECTOR,
1811 RT_IMPORT,
1812 RT_INCLUDE,
1813 RT_PROCESS_UCA_RULES,
1814 RT_PROCESS_COLLATION,
1815 RT_PROCESS_TRANSLITERATOR,
1816 RT_PROCESS_DEPENDENCY,
1817 RT_RESERVED
1818 } EResourceType;
1819
1820 static struct {
1821 const char *nameChars; /* only used for debugging */
1822 const UChar *nameUChars;
1823 ParseResourceFunction *parseFunction;
1824 } gResourceTypes[] = {
1825 {"Unknown", NULL, NULL},
1826 {"string", k_type_string, parseString},
1827 {"binary", k_type_binary, parseBinary},
1828 {"table", k_type_table, parseTable},
1829 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1830 {"integer", k_type_integer, parseInteger},
1831 {"array", k_type_array, parseArray},
1832 {"alias", k_type_alias, parseAlias},
1833 {"intvector", k_type_intvector, parseIntVector},
1834 {"import", k_type_import, parseImport},
1835 {"include", k_type_include, parseInclude},
1836 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1837 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1838 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1839 {"process(dependency)", k_type_plugin_dependency, parseDependency},
1840 {"reserved", NULL, NULL}
1841 };
1842
1843 void initParser(UBool omitCollationRules)
1844 {
1845 U_STRING_INIT(k_type_string, "string", 6);
1846 U_STRING_INIT(k_type_binary, "binary", 6);
1847 U_STRING_INIT(k_type_bin, "bin", 3);
1848 U_STRING_INIT(k_type_table, "table", 5);
1849 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17);
1850 U_STRING_INIT(k_type_int, "int", 3);
1851 U_STRING_INIT(k_type_integer, "integer", 7);
1852 U_STRING_INIT(k_type_array, "array", 5);
1853 U_STRING_INIT(k_type_alias, "alias", 5);
1854 U_STRING_INIT(k_type_intvector, "intvector", 9);
1855 U_STRING_INIT(k_type_import, "import", 6);
1856 U_STRING_INIT(k_type_reserved, "reserved", 8);
1857 U_STRING_INIT(k_type_include, "include", 7);
1858
1859 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1860 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18);
1861 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23);
1862 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19);
1863
1864 gOmitCollationRules = omitCollationRules;
1865 }
1866
1867 static inline UBool isTable(enum EResourceType type) {
1868 return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK);
1869 }
1870
1871 static enum EResourceType
1872 parseResourceType(ParseState* state, UErrorCode *status)
1873 {
1874 struct UString *tokenValue;
1875 struct UString comment;
1876 enum EResourceType result = RT_UNKNOWN;
1877 uint32_t line=0;
1878 ustr_init(&comment);
1879 expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
1880
1881 if (U_FAILURE(*status))
1882 {
1883 return RT_UNKNOWN;
1884 }
1885
1886 *status = U_ZERO_ERROR;
1887
1888 /* Search for normal types */
1889 result=RT_UNKNOWN;
1890 while ((result=(EResourceType)(result+1)) < RT_RESERVED) {
1891 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1892 break;
1893 }
1894 }
1895 /* Now search for the aliases */
1896 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
1897 result = RT_INTEGER;
1898 }
1899 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
1900 result = RT_BINARY;
1901 }
1902 else if (result == RT_RESERVED) {
1903 char tokenBuffer[1024];
1904 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1905 tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1906 *status = U_INVALID_FORMAT_ERROR;
1907 error(line, "unknown resource type '%s'", tokenBuffer);
1908 }
1909
1910 return result;
1911 }
1912
1913 /* parse a non-top-level resource */
1914 static struct SResource *
1915 parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
1916 {
1917 enum ETokenType token;
1918 enum EResourceType resType = RT_UNKNOWN;
1919 ParseResourceFunction *parseFunction = NULL;
1920 struct UString *tokenValue;
1921 uint32_t startline;
1922 uint32_t line;
1923
1924
1925 token = getToken(state, &tokenValue, NULL, &startline, status);
1926
1927 if(isVerbose()){
1928 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1929 }
1930
1931 /* name . [ ':' type ] '{' resource '}' */
1932 /* This function parses from the colon onwards. If the colon is present, parse the
1933 type then try to parse a resource of that type. If there is no explicit type,
1934 work it out using the lookahead tokens. */
1935 switch (token)
1936 {
1937 case TOK_EOF:
1938 *status = U_INVALID_FORMAT_ERROR;
1939 error(startline, "Unexpected EOF encountered");
1940 return NULL;
1941
1942 case TOK_ERROR:
1943 *status = U_INVALID_FORMAT_ERROR;
1944 return NULL;
1945
1946 case TOK_COLON:
1947 resType = parseResourceType(state, status);
1948 expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
1949
1950 if (U_FAILURE(*status))
1951 {
1952 return NULL;
1953 }
1954
1955 break;
1956
1957 case TOK_OPEN_BRACE:
1958 break;
1959
1960 default:
1961 *status = U_INVALID_FORMAT_ERROR;
1962 error(startline, "syntax error while reading a resource, expected '{' or ':'");
1963 return NULL;
1964 }
1965
1966
1967 if (resType == RT_UNKNOWN)
1968 {
1969 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
1970 We could have any of the following:
1971 { { => array (nested)
1972 { :/} => array
1973 { string , => string array
1974
1975 { string { => table
1976
1977 { string :/{ => table
1978 { string } => string
1979 */
1980
1981 token = peekToken(state, 0, NULL, &line, NULL,status);
1982
1983 if (U_FAILURE(*status))
1984 {
1985 return NULL;
1986 }
1987
1988 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
1989 {
1990 resType = RT_ARRAY;
1991 }
1992 else if (token == TOK_STRING)
1993 {
1994 token = peekToken(state, 1, NULL, &line, NULL, status);
1995
1996 if (U_FAILURE(*status))
1997 {
1998 return NULL;
1999 }
2000
2001 switch (token)
2002 {
2003 case TOK_COMMA: resType = RT_ARRAY; break;
2004 case TOK_OPEN_BRACE: resType = RT_TABLE; break;
2005 case TOK_CLOSE_BRACE: resType = RT_STRING; break;
2006 case TOK_COLON: resType = RT_TABLE; break;
2007 default:
2008 *status = U_INVALID_FORMAT_ERROR;
2009 error(line, "Unexpected token after string, expected ',', '{' or '}'");
2010 return NULL;
2011 }
2012 }
2013 else
2014 {
2015 *status = U_INVALID_FORMAT_ERROR;
2016 error(line, "Unexpected token after '{'");
2017 return NULL;
2018 }
2019
2020 /* printf("Type guessed as %s\n", resourceNames[resType]); */
2021 } else if(resType == RT_TABLE_NO_FALLBACK) {
2022 *status = U_INVALID_FORMAT_ERROR;
2023 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
2024 return NULL;
2025 }
2026
2027
2028 /* We should now know what we need to parse next, so call the appropriate parser
2029 function and return. */
2030 parseFunction = gResourceTypes[resType].parseFunction;
2031 if (parseFunction != NULL) {
2032 return parseFunction(state, tag, startline, comment, status);
2033 }
2034 else {
2035 *status = U_INTERNAL_PROGRAM_ERROR;
2036 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
2037 }
2038
2039 return NULL;
2040 }
2041
2042 /* parse the top-level resource */
2043 struct SRBRoot *
2044 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UBool makeBinaryCollation,
2045 UErrorCode *status)
2046 {
2047 struct UString *tokenValue;
2048 struct UString comment;
2049 uint32_t line;
2050 enum EResourceType bundleType;
2051 enum ETokenType token;
2052 ParseState state;
2053 uint32_t i;
2054
2055
2056 for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
2057 {
2058 ustr_init(&state.lookahead[i].value);
2059 ustr_init(&state.lookahead[i].comment);
2060 }
2061
2062 initLookahead(&state, buf, status);
2063
2064 state.inputdir = inputDir;
2065 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
2066 state.outputdir = outputDir;
2067 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
2068 state.makeBinaryCollation = makeBinaryCollation;
2069
2070 ustr_init(&comment);
2071 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
2072
2073 state.bundle = bundle_open(&comment, FALSE, status);
2074
2075 if (state.bundle == NULL || U_FAILURE(*status))
2076 {
2077 return NULL;
2078 }
2079
2080
2081 bundle_setlocale(state.bundle, tokenValue->fChars, status);
2082
2083 /* The following code is to make Empty bundle work no matter with :table specifer or not */
2084 token = getToken(&state, NULL, NULL, &line, status);
2085 if(token==TOK_COLON) {
2086 *status=U_ZERO_ERROR;
2087 bundleType=parseResourceType(&state, status);
2088
2089 if(isTable(bundleType))
2090 {
2091 expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
2092 }
2093 else
2094 {
2095 *status=U_PARSE_ERROR;
2096 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
2097 }
2098 }
2099 else
2100 {
2101 /* not a colon */
2102 if(token==TOK_OPEN_BRACE)
2103 {
2104 *status=U_ZERO_ERROR;
2105 bundleType=RT_TABLE;
2106 }
2107 else
2108 {
2109 /* neither colon nor open brace */
2110 *status=U_PARSE_ERROR;
2111 bundleType=RT_UNKNOWN;
2112 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
2113 }
2114 }
2115
2116 if (U_FAILURE(*status))
2117 {
2118 bundle_close(state.bundle, status);
2119 return NULL;
2120 }
2121
2122 if(bundleType==RT_TABLE_NO_FALLBACK) {
2123 /*
2124 * Parse a top-level table with the table(nofallback) declaration.
2125 * This is the same as a regular table, but also sets the
2126 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2127 */
2128 state.bundle->noFallback=TRUE;
2129 }
2130 /* top-level tables need not handle special table names like "collations" */
2131 realParseTable(&state, state.bundle->fRoot, NULL, line, status);
2132 if(dependencyArray!=NULL){
2133 table_add(state.bundle->fRoot, dependencyArray, 0, status);
2134 dependencyArray = NULL;
2135 }
2136 if (U_FAILURE(*status))
2137 {
2138 bundle_close(state.bundle, status);
2139 res_close(dependencyArray);
2140 return NULL;
2141 }
2142
2143 if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
2144 {
2145 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
2146 if(isStrict()){
2147 *status = U_INVALID_FORMAT_ERROR;
2148 return NULL;
2149 }
2150 }
2151
2152 cleanupLookahead(&state);
2153 ustr_deinit(&comment);
2154 return state.bundle;
2155 }