]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/genrb/parse.c
ICU-461.12.tar.gz
[apple/icu.git] / icuSources / tools / genrb / parse.c
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1998-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File parse.c
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 05/26/99 stephen Creation.
15 * 02/25/00 weiv Overhaul to write udata
16 * 5/10/01 Ram removed ustdio dependency
17 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
18 *******************************************************************************
19 */
20
21 #include "ucol_imp.h"
22 #include "parse.h"
23 #include "errmsg.h"
24 #include "uhash.h"
25 #include "cmemory.h"
26 #include "cstring.h"
27 #include "uinvchar.h"
28 #include "read.h"
29 #include "ustr.h"
30 #include "reslist.h"
31 #include "rbt_pars.h"
32 #include "genrb.h"
33 #include "unicode/ustring.h"
34 #include "unicode/uscript.h"
35 #include "unicode/putil.h"
36 #include <stdio.h>
37
38 extern UBool gIncludeUnihanColl;
39
40 /* Number of tokens to read ahead of the current stream position */
41 #define MAX_LOOKAHEAD 3
42
43 #define CR 0x000D
44 #define LF 0x000A
45 #define SPACE 0x0020
46 #define TAB 0x0009
47 #define ESCAPE 0x005C
48 #define HASH 0x0023
49 #define QUOTE 0x0027
50 #define ZERO 0x0030
51 #define STARTCOMMAND 0x005B
52 #define ENDCOMMAND 0x005D
53 #define OPENSQBRACKET 0x005B
54 #define CLOSESQBRACKET 0x005D
55
56 struct Lookahead
57 {
58 enum ETokenType type;
59 struct UString value;
60 struct UString comment;
61 uint32_t line;
62 };
63
64 /* keep in sync with token defines in read.h */
65 const char *tokenNames[TOK_TOKEN_COUNT] =
66 {
67 "string", /* A string token, such as "MonthNames" */
68 "'{'", /* An opening brace character */
69 "'}'", /* A closing brace character */
70 "','", /* A comma */
71 "':'", /* A colon */
72
73 "<end of file>", /* End of the file has been reached successfully */
74 "<end of line>"
75 };
76
77 /* Just to store "TRUE" */
78 static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
79
80 typedef struct {
81 struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
82 uint32_t lookaheadPosition;
83 UCHARBUF *buffer;
84 struct SRBRoot *bundle;
85 const char *inputdir;
86 uint32_t inputdirLength;
87 const char *outputdir;
88 uint32_t outputdirLength;
89 } ParseState;
90
91 static UBool gMakeBinaryCollation = TRUE;
92 static UBool gOmitCollationRules = FALSE;
93
94 typedef struct SResource *
95 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
96
97 static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
98
99 /* The nature of the lookahead buffer:
100 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
101 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
102 When getToken is called, the current pointer is moved to the next slot and the
103 old slot is filled with the next token from the reader by calling getNextToken.
104 The token values are stored in the slot, which means that token values don't
105 survive a call to getToken, ie.
106
107 UString *value;
108
109 getToken(&value, NULL, status);
110 getToken(NULL, NULL, status); bad - value is now a different string
111 */
112 static void
113 initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
114 {
115 static uint32_t initTypeStrings = 0;
116 uint32_t i;
117
118 if (!initTypeStrings)
119 {
120 initTypeStrings = 1;
121 }
122
123 state->lookaheadPosition = 0;
124 state->buffer = buf;
125
126 resetLineNumber();
127
128 for (i = 0; i < MAX_LOOKAHEAD; i++)
129 {
130 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
131 if (U_FAILURE(*status))
132 {
133 return;
134 }
135 }
136
137 *status = U_ZERO_ERROR;
138 }
139
140 static void
141 cleanupLookahead(ParseState* state)
142 {
143 uint32_t i;
144 for (i = 0; i < MAX_LOOKAHEAD; i++)
145 {
146 ustr_deinit(&state->lookahead[i].value);
147 ustr_deinit(&state->lookahead[i].comment);
148 }
149
150 }
151
152 static enum ETokenType
153 getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
154 {
155 enum ETokenType result;
156 uint32_t i;
157
158 result = state->lookahead[state->lookaheadPosition].type;
159
160 if (tokenValue != NULL)
161 {
162 *tokenValue = &state->lookahead[state->lookaheadPosition].value;
163 }
164
165 if (linenumber != NULL)
166 {
167 *linenumber = state->lookahead[state->lookaheadPosition].line;
168 }
169
170 if (comment != NULL)
171 {
172 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
173 }
174
175 i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
176 state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
177 ustr_setlen(&state->lookahead[i].comment, 0, status);
178 ustr_setlen(&state->lookahead[i].value, 0, status);
179 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
180
181 /* printf("getToken, returning %s\n", tokenNames[result]); */
182
183 return result;
184 }
185
186 static enum ETokenType
187 peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
188 {
189 uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
190
191 if (U_FAILURE(*status))
192 {
193 return TOK_ERROR;
194 }
195
196 if (lookaheadCount >= MAX_LOOKAHEAD)
197 {
198 *status = U_INTERNAL_PROGRAM_ERROR;
199 return TOK_ERROR;
200 }
201
202 if (tokenValue != NULL)
203 {
204 *tokenValue = &state->lookahead[i].value;
205 }
206
207 if (linenumber != NULL)
208 {
209 *linenumber = state->lookahead[i].line;
210 }
211
212 if(comment != NULL){
213 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
214 }
215
216 return state->lookahead[i].type;
217 }
218
219 static void
220 expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
221 {
222 uint32_t line;
223
224 enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
225
226 if (linenumber != NULL)
227 {
228 *linenumber = line;
229 }
230
231 if (U_FAILURE(*status))
232 {
233 return;
234 }
235
236 if (token != expectedToken)
237 {
238 *status = U_INVALID_FORMAT_ERROR;
239 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
240 }
241 else
242 {
243 *status = U_ZERO_ERROR;
244 }
245 }
246
247 static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
248 {
249 struct UString *tokenValue;
250 char *result;
251 uint32_t count;
252
253 expect(state, TOK_STRING, &tokenValue, comment, line, status);
254
255 if (U_FAILURE(*status))
256 {
257 return NULL;
258 }
259
260 count = u_strlen(tokenValue->fChars);
261 if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
262 *status = U_INVALID_FORMAT_ERROR;
263 error(*line, "invariant characters required for table keys, binary data, etc.");
264 return NULL;
265 }
266
267 result = uprv_malloc(count+1);
268
269 if (result == NULL)
270 {
271 *status = U_MEMORY_ALLOCATION_ERROR;
272 return NULL;
273 }
274
275 u_UCharsToChars(tokenValue->fChars, result, count+1);
276 return result;
277 }
278
279 static struct SResource *
280 parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
281 {
282 struct SResource *result = NULL;
283 struct UString *tokenValue;
284 FileStream *file = NULL;
285 char filename[256] = { '\0' };
286 char cs[128] = { '\0' };
287 uint32_t line;
288 int len=0;
289 UBool quoted = FALSE;
290 UCHARBUF *ucbuf=NULL;
291 UChar32 c = 0;
292 const char* cp = NULL;
293 UChar *pTarget = NULL;
294 UChar *target = NULL;
295 UChar *targetLimit = NULL;
296 int32_t size = 0;
297
298 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
299
300 if(isVerbose()){
301 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
302 }
303
304 if (U_FAILURE(*status))
305 {
306 return NULL;
307 }
308 /* make the filename including the directory */
309 if (state->inputdir != NULL)
310 {
311 uprv_strcat(filename, state->inputdir);
312
313 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
314 {
315 uprv_strcat(filename, U_FILE_SEP_STRING);
316 }
317 }
318
319 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
320
321 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
322
323 if (U_FAILURE(*status))
324 {
325 return NULL;
326 }
327 uprv_strcat(filename, cs);
328
329 if(gOmitCollationRules) {
330 return res_none();
331 }
332
333 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
334
335 if (U_FAILURE(*status)) {
336 error(line, "An error occured while opening the input file %s\n", filename);
337 return NULL;
338 }
339
340 /* We allocate more space than actually required
341 * since the actual size needed for storing UChars
342 * is not known in UTF-8 byte stream
343 */
344 size = ucbuf_size(ucbuf) + 1;
345 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
346 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
347 target = pTarget;
348 targetLimit = pTarget+size;
349
350 /* read the rules into the buffer */
351 while (target < targetLimit)
352 {
353 c = ucbuf_getc(ucbuf, status);
354 if(c == QUOTE) {
355 quoted = (UBool)!quoted;
356 }
357 /* weiv (06/26/2002): adding the following:
358 * - preserving spaces in commands [...]
359 * - # comments until the end of line
360 */
361 if (c == STARTCOMMAND && !quoted)
362 {
363 /* preserve commands
364 * closing bracket will be handled by the
365 * append at the end of the loop
366 */
367 while(c != ENDCOMMAND) {
368 U_APPEND_CHAR32(c, target,len);
369 c = ucbuf_getc(ucbuf, status);
370 }
371 }
372 else if (c == HASH && !quoted) {
373 /* skip comments */
374 while(c != CR && c != LF) {
375 c = ucbuf_getc(ucbuf, status);
376 }
377 continue;
378 }
379 else if (c == ESCAPE)
380 {
381 c = unescape(ucbuf, status);
382
383 if (c == U_ERR)
384 {
385 uprv_free(pTarget);
386 T_FileStream_close(file);
387 return NULL;
388 }
389 }
390 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
391 {
392 /* ignore spaces carriage returns
393 * and line feed unless in the form \uXXXX
394 */
395 continue;
396 }
397
398 /* Append UChar * after dissembling if c > 0xffff*/
399 if (c != U_EOF)
400 {
401 U_APPEND_CHAR32(c, target,len);
402 }
403 else
404 {
405 break;
406 }
407 }
408
409 /* terminate the string */
410 if(target < targetLimit){
411 *target = 0x0000;
412 }
413
414 result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
415
416
417 ucbuf_close(ucbuf);
418 uprv_free(pTarget);
419 T_FileStream_close(file);
420
421 return result;
422 }
423
424 static struct SResource *
425 parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
426 {
427 struct SResource *result = NULL;
428 struct UString *tokenValue;
429 FileStream *file = NULL;
430 char filename[256] = { '\0' };
431 char cs[128] = { '\0' };
432 uint32_t line;
433 UCHARBUF *ucbuf=NULL;
434 const char* cp = NULL;
435 UChar *pTarget = NULL;
436 const UChar *pSource = NULL;
437 int32_t size = 0;
438
439 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
440
441 if(isVerbose()){
442 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
443 }
444
445 if (U_FAILURE(*status))
446 {
447 return NULL;
448 }
449 /* make the filename including the directory */
450 if (state->inputdir != NULL)
451 {
452 uprv_strcat(filename, state->inputdir);
453
454 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
455 {
456 uprv_strcat(filename, U_FILE_SEP_STRING);
457 }
458 }
459
460 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
461
462 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
463
464 if (U_FAILURE(*status))
465 {
466 return NULL;
467 }
468 uprv_strcat(filename, cs);
469
470
471 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
472
473 if (U_FAILURE(*status)) {
474 error(line, "An error occured while opening the input file %s\n", filename);
475 return NULL;
476 }
477
478 /* We allocate more space than actually required
479 * since the actual size needed for storing UChars
480 * is not known in UTF-8 byte stream
481 */
482 pSource = ucbuf_getBuffer(ucbuf, &size, status);
483 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
484 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
485
486 #if !UCONFIG_NO_TRANSLITERATION
487 size = utrans_stripRules(pSource, size, pTarget, status);
488 #else
489 size = 0;
490 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
491 #endif
492 result = string_open(state->bundle, tag, pTarget, size, NULL, status);
493
494 ucbuf_close(ucbuf);
495 uprv_free(pTarget);
496 T_FileStream_close(file);
497
498 return result;
499 }
500 static struct SResource* dependencyArray = NULL;
501
502 static struct SResource *
503 parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
504 {
505 struct SResource *result = NULL;
506 struct SResource *elem = NULL;
507 struct UString *tokenValue;
508 uint32_t line;
509 char filename[256] = { '\0' };
510 char cs[128] = { '\0' };
511
512 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
513
514 if(isVerbose()){
515 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
516 }
517
518 if (U_FAILURE(*status))
519 {
520 return NULL;
521 }
522 /* make the filename including the directory */
523 if (state->outputdir != NULL)
524 {
525 uprv_strcat(filename, state->outputdir);
526
527 if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
528 {
529 uprv_strcat(filename, U_FILE_SEP_STRING);
530 }
531 }
532
533 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
534
535 if (U_FAILURE(*status))
536 {
537 return NULL;
538 }
539 uprv_strcat(filename, cs);
540 if(!T_FileStream_file_exists(filename)){
541 if(isStrict()){
542 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
543 }else{
544 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
545 }
546 }
547 if(dependencyArray==NULL){
548 dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
549 }
550 if(tag!=NULL){
551 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
552 }
553 elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
554
555 array_add(dependencyArray, elem, status);
556
557 if (U_FAILURE(*status))
558 {
559 return NULL;
560 }
561 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
562 return result;
563 }
564 static struct SResource *
565 parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
566 {
567 struct UString *tokenValue;
568 struct SResource *result = NULL;
569
570 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
571 {
572 return parseUCARules(tag, startline, status);
573 }*/
574 if(isVerbose()){
575 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
576 }
577 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
578
579 if (U_SUCCESS(*status))
580 {
581 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
582 doesn't survive expect either) */
583
584 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
585 if(U_SUCCESS(*status) && result) {
586 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
587
588 if (U_FAILURE(*status))
589 {
590 res_close(result);
591 return NULL;
592 }
593 }
594 }
595
596 return result;
597 }
598
599 static struct SResource *
600 parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
601 {
602 struct UString *tokenValue;
603 struct SResource *result = NULL;
604
605 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
606
607 if(isVerbose()){
608 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
609 }
610
611 if (U_SUCCESS(*status))
612 {
613 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
614 doesn't survive expect either) */
615
616 result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
617
618 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
619
620 if (U_FAILURE(*status))
621 {
622 res_close(result);
623 return NULL;
624 }
625 }
626
627 return result;
628 }
629
630 typedef struct{
631 const char* inputDir;
632 const char* outputDir;
633 } GenrbData;
634
635 static struct SResource* resLookup(struct SResource* res, const char* key){
636 struct SResource *current = NULL;
637 struct SResTable *list;
638 if (res == res_none()) {
639 return NULL;
640 }
641
642 list = &(res->u.fTable);
643
644 current = list->fFirst;
645 while (current != NULL) {
646 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
647 return current;
648 }
649 current = current->fNext;
650 }
651 return NULL;
652 }
653
654 static const UChar* importFromDataFile(void* context, const char* locale, const char* type, int32_t* pLength, UErrorCode* status){
655 struct SRBRoot *data = NULL;
656 UCHARBUF *ucbuf = NULL;
657 GenrbData* genrbdata = (GenrbData*) context;
658 int localeLength = strlen(locale);
659 char* filename = (char*)uprv_malloc(localeLength+5);
660 char *inputDirBuf = NULL;
661 char *openFileName = NULL;
662 const char* cp = "";
663 UChar* urules = NULL;
664 int32_t urulesLength = 0;
665 int32_t i = 0;
666 int32_t dirlen = 0;
667 int32_t filelen = 0;
668 struct SResource* root;
669 struct SResource* collations;
670 struct SResource* collation;
671 struct SResource* sequence;
672
673 memcpy(filename, locale, localeLength);
674 for(i = 0; i < localeLength; i++){
675 if(filename[i] == '-'){
676 filename[i] = '_';
677 }
678 }
679 filename[localeLength] = '.';
680 filename[localeLength+1] = 't';
681 filename[localeLength+2] = 'x';
682 filename[localeLength+3] = 't';
683 filename[localeLength+4] = 0;
684
685
686 if (status==NULL || U_FAILURE(*status)) {
687 return NULL;
688 }
689 if(filename==NULL){
690 *status=U_ILLEGAL_ARGUMENT_ERROR;
691 return NULL;
692 }else{
693 filelen = (int32_t)uprv_strlen(filename);
694 }
695 if(genrbdata->inputDir == NULL) {
696 const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
697 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
698 openFileName[0] = '\0';
699 if (filenameBegin != NULL) {
700 /*
701 * When a filename ../../../data/root.txt is specified,
702 * we presume that the input directory is ../../../data
703 * This is very important when the resource file includes
704 * another file, like UCARules.txt or thaidict.brk.
705 */
706 int32_t filenameSize = (int32_t)(filenameBegin - filename + 1);
707 inputDirBuf = uprv_strncpy((char *)uprv_malloc(filenameSize), filename, filenameSize);
708
709 /* test for NULL */
710 if(inputDirBuf == NULL) {
711 *status = U_MEMORY_ALLOCATION_ERROR;
712 goto finish;
713 }
714
715 inputDirBuf[filenameSize - 1] = 0;
716 genrbdata->inputDir = inputDirBuf;
717 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir);
718 }
719 }else{
720 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir);
721
722 if(genrbdata->inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
723 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
724
725 /* test for NULL */
726 if(openFileName == NULL) {
727 *status = U_MEMORY_ALLOCATION_ERROR;
728 goto finish;
729 }
730
731 openFileName[0] = '\0';
732 /*
733 * append the input dir to openFileName if the first char in
734 * filename is not file seperation char and the last char input directory is not '.'.
735 * This is to support :
736 * genrb -s. /home/icu/data
737 * genrb -s. icu/data
738 * The user cannot mix notations like
739 * genrb -s. /icu/data --- the absolute path specified. -s redundant
740 * user should use
741 * genrb -s. icu/data --- start from CWD and look in icu/data dir
742 */
743 if( (filename[0] != U_FILE_SEP_CHAR) && (genrbdata->inputDir[dirlen-1] !='.')){
744 uprv_strcpy(openFileName, genrbdata->inputDir);
745 openFileName[dirlen] = U_FILE_SEP_CHAR;
746 }
747 openFileName[dirlen + 1] = '\0';
748 } else {
749 openFileName = (char *) uprv_malloc(dirlen + filelen + 1);
750
751 /* test for NULL */
752 if(openFileName == NULL) {
753 *status = U_MEMORY_ALLOCATION_ERROR;
754 goto finish;
755 }
756
757 uprv_strcpy(openFileName, genrbdata->inputDir);
758
759 }
760 }
761 uprv_strcat(openFileName, filename);
762 /* printf("%s\n", openFileName); */
763 *status = U_ZERO_ERROR;
764 ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, status);
765
766 if(*status == U_FILE_ACCESS_ERROR) {
767
768 fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName);
769 goto finish;
770 }
771 if (ucbuf == NULL || U_FAILURE(*status)) {
772 fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(*status));
773 goto finish;
774 }
775
776 /* Parse the data into an SRBRoot */
777 data = parse(ucbuf, genrbdata->inputDir, genrbdata->outputDir, status);
778
779 root = data->fRoot;
780 collations = resLookup(root, "collations");
781 collation = resLookup(collations, type);
782 sequence = resLookup(collation, "Sequence");
783 urules = sequence->u.fString.fChars;
784 urulesLength = sequence->u.fString.fLength;
785 *pLength = urulesLength;
786
787 finish:
788
789 if (inputDirBuf != NULL) {
790 uprv_free(inputDirBuf);
791 }
792
793 if (openFileName != NULL) {
794 uprv_free(openFileName);
795 }
796
797 if(ucbuf) {
798 ucbuf_close(ucbuf);
799 }
800
801 return urules;
802 }
803
804 static struct SResource *
805 addCollation(ParseState* state, struct SResource *result, uint32_t startline, UErrorCode *status)
806 {
807 struct SResource *member = NULL;
808 struct UString *tokenValue;
809 struct UString comment;
810 enum ETokenType token;
811 char subtag[1024];
812 UVersionInfo version;
813 UBool override = FALSE;
814 uint32_t line;
815 GenrbData genrbdata;
816 /* '{' . (name resource)* '}' */
817 version[0]=0; version[1]=0; version[2]=0; version[3]=0;
818
819 for (;;)
820 {
821 ustr_init(&comment);
822 token = getToken(state, &tokenValue, &comment, &line, status);
823
824 if (token == TOK_CLOSE_BRACE)
825 {
826 return result;
827 }
828
829 if (token != TOK_STRING)
830 {
831 res_close(result);
832 *status = U_INVALID_FORMAT_ERROR;
833
834 if (token == TOK_EOF)
835 {
836 error(startline, "unterminated table");
837 }
838 else
839 {
840 error(line, "Unexpected token %s", tokenNames[token]);
841 }
842
843 return NULL;
844 }
845
846 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
847
848 if (U_FAILURE(*status))
849 {
850 res_close(result);
851 return NULL;
852 }
853
854 member = parseResource(state, subtag, NULL, status);
855
856 if (U_FAILURE(*status))
857 {
858 res_close(result);
859 return NULL;
860 }
861
862 if (uprv_strcmp(subtag, "Version") == 0)
863 {
864 char ver[40];
865 int32_t length = member->u.fString.fLength;
866
867 if (length >= (int32_t) sizeof(ver))
868 {
869 length = (int32_t) sizeof(ver) - 1;
870 }
871
872 u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
873 u_versionFromString(version, ver);
874
875 table_add(result, member, line, status);
876
877 }
878 else if (uprv_strcmp(subtag, "Override") == 0)
879 {
880 override = FALSE;
881
882 if (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0)
883 {
884 override = TRUE;
885 }
886 table_add(result, member, line, status);
887
888 }
889 else if(uprv_strcmp(subtag, "%%CollationBin")==0)
890 {
891 /* discard duplicate %%CollationBin if any*/
892 }
893 else if (uprv_strcmp(subtag, "Sequence") == 0)
894 {
895 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
896 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
897 #else
898 if(gMakeBinaryCollation) {
899 UErrorCode intStatus = U_ZERO_ERROR;
900
901 /* do the collation elements */
902 int32_t len = 0;
903 uint8_t *data = NULL;
904 UCollator *coll = NULL;
905 int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)];
906 uint32_t reorderCodeCount;
907 int32_t reorderCodeIndex;
908 UParseError parseError;
909
910 genrbdata.inputDir = state->inputdir;
911 genrbdata.outputDir = state->outputdir;
912
913 coll = ucol_openRulesForImport(member->u.fString.fChars, member->u.fString.fLength,
914 UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, importFromDataFile, &genrbdata, &intStatus);
915
916 if (U_SUCCESS(intStatus) && coll != NULL)
917 {
918 len = ucol_cloneBinary(coll, NULL, 0, &intStatus);
919 data = (uint8_t *)uprv_malloc(len);
920 intStatus = U_ZERO_ERROR;
921 len = ucol_cloneBinary(coll, data, len, &intStatus);
922 /*data = ucol_cloneRuleData(coll, &len, &intStatus);*/
923
924 /* tailoring rules version */
925 /* This is wrong! */
926 /*coll->dataInfo.dataVersion[1] = version[0];*/
927 /* Copy tailoring version. Builder version already */
928 /* set in ucol_openRules */
929 ((UCATableHeader *)data)->version[1] = version[0];
930 ((UCATableHeader *)data)->version[2] = version[1];
931 ((UCATableHeader *)data)->version[3] = version[2];
932
933 if (U_SUCCESS(intStatus) && data != NULL)
934 {
935 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", len, data, NULL, NULL, status);
936 table_add(result, collationBin, line, status);
937 uprv_free(data);
938
939 reorderCodeCount = ucol_getReorderCodes(
940 coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST), &intStatus);
941 if (U_SUCCESS(intStatus) && reorderCodeCount > 0) {
942 struct SResource *reorderCodeRes = intvector_open(state->bundle, "%%ReorderCodes", NULL, status);
943 for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodeCount; reorderCodeIndex++) {
944 intvector_add(reorderCodeRes, reorderCodes[reorderCodeIndex], status);
945 }
946 table_add(result, reorderCodeRes, line, status);
947 }
948 }
949 else
950 {
951 warning(line, "could not obtain rules from collator");
952 if(isStrict()){
953 *status = U_INVALID_FORMAT_ERROR;
954 return NULL;
955 }
956 }
957
958 ucol_close(coll);
959 }
960 else
961 {
962 if(intStatus == U_FILE_ACCESS_ERROR) {
963 error(startline, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly.");
964 *status = intStatus;
965 return NULL;
966 }
967 warning(line, "%%Collation could not be constructed from CollationElements - check context!");
968 if(isStrict()){
969 *status = intStatus;
970 return NULL;
971 }
972 }
973 } else {
974 if(isVerbose()) {
975 printf("Not building Collation binary\n");
976 }
977 }
978 #endif
979 /* in order to achieve smaller data files, we can direct genrb */
980 /* to omit collation rules */
981 if(gOmitCollationRules) {
982 bundle_closeString(state->bundle, member);
983 } else {
984 table_add(result, member, line, status);
985 }
986 }
987
988 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
989
990 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
991
992 if (U_FAILURE(*status))
993 {
994 res_close(result);
995 return NULL;
996 }
997 }
998
999 /* not reached */
1000 /* A compiler warning will appear if all paths don't contain a return statement. */
1001 /* *status = U_INTERNAL_PROGRAM_ERROR;
1002 return NULL;*/
1003 }
1004
1005 static struct SResource *
1006 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
1007 {
1008 struct SResource *result = NULL;
1009 struct SResource *member = NULL;
1010 struct SResource *collationRes = NULL;
1011 struct UString *tokenValue;
1012 struct UString comment;
1013 enum ETokenType token;
1014 char subtag[1024], typeKeyword[1024];
1015 uint32_t line;
1016
1017 result = table_open(state->bundle, tag, NULL, status);
1018
1019 if (result == NULL || U_FAILURE(*status))
1020 {
1021 return NULL;
1022 }
1023 if(isVerbose()){
1024 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1025 }
1026 if(!newCollation) {
1027 return addCollation(state, result, startline, status);
1028 }
1029 else {
1030 for(;;) {
1031 ustr_init(&comment);
1032 token = getToken(state, &tokenValue, &comment, &line, status);
1033
1034 if (token == TOK_CLOSE_BRACE)
1035 {
1036 return result;
1037 }
1038
1039 if (token != TOK_STRING)
1040 {
1041 res_close(result);
1042 *status = U_INVALID_FORMAT_ERROR;
1043
1044 if (token == TOK_EOF)
1045 {
1046 error(startline, "unterminated table");
1047 }
1048 else
1049 {
1050 error(line, "Unexpected token %s", tokenNames[token]);
1051 }
1052
1053 return NULL;
1054 }
1055
1056 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1057
1058 if (U_FAILURE(*status))
1059 {
1060 res_close(result);
1061 return NULL;
1062 }
1063
1064 if (uprv_strcmp(subtag, "default") == 0)
1065 {
1066 member = parseResource(state, subtag, NULL, status);
1067
1068 if (U_FAILURE(*status))
1069 {
1070 res_close(result);
1071 return NULL;
1072 }
1073
1074 table_add(result, member, line, status);
1075 }
1076 else
1077 {
1078 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
1079 /* this probably needs to be refactored or recursively use the parser */
1080 /* first we assume that our collation table won't have the explicit type */
1081 /* then, we cannot handle aliases */
1082 if(token == TOK_OPEN_BRACE) {
1083 token = getToken(state, &tokenValue, &comment, &line, status);
1084 collationRes = table_open(state->bundle, subtag, NULL, status);
1085 collationRes = addCollation(state, collationRes, startline, status); /* need to parse the collation data regardless */
1086 if (gIncludeUnihanColl || uprv_strcmp(subtag, "unihan") != 0) {
1087 table_add(result, collationRes, startline, status);
1088 }
1089 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
1090 /* we could have a table too */
1091 token = peekToken(state, 1, &tokenValue, &line, &comment, status);
1092 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
1093 if(uprv_strcmp(typeKeyword, "alias") == 0) {
1094 member = parseResource(state, subtag, NULL, status);
1095
1096 if (U_FAILURE(*status))
1097 {
1098 res_close(result);
1099 return NULL;
1100 }
1101
1102 table_add(result, member, line, status);
1103 } else {
1104 res_close(result);
1105 *status = U_INVALID_FORMAT_ERROR;
1106 return NULL;
1107 }
1108 } else {
1109 res_close(result);
1110 *status = U_INVALID_FORMAT_ERROR;
1111 return NULL;
1112 }
1113 }
1114
1115 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1116
1117 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1118
1119 if (U_FAILURE(*status))
1120 {
1121 res_close(result);
1122 return NULL;
1123 }
1124 }
1125 }
1126 }
1127
1128 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1129 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1130 static struct SResource *
1131 realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
1132 {
1133 struct SResource *member = NULL;
1134 struct UString *tokenValue=NULL;
1135 struct UString comment;
1136 enum ETokenType token;
1137 char subtag[1024];
1138 uint32_t line;
1139 UBool readToken = FALSE;
1140
1141 /* '{' . (name resource)* '}' */
1142 if(isVerbose()){
1143 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1144 }
1145 for (;;)
1146 {
1147 ustr_init(&comment);
1148 token = getToken(state, &tokenValue, &comment, &line, status);
1149
1150 if (token == TOK_CLOSE_BRACE)
1151 {
1152 if (!readToken) {
1153 warning(startline, "Encountered empty table");
1154 }
1155 return table;
1156 }
1157
1158 if (token != TOK_STRING)
1159 {
1160 *status = U_INVALID_FORMAT_ERROR;
1161
1162 if (token == TOK_EOF)
1163 {
1164 error(startline, "unterminated table");
1165 }
1166 else
1167 {
1168 error(line, "unexpected token %s", tokenNames[token]);
1169 }
1170
1171 return NULL;
1172 }
1173
1174 if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
1175 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1176 } else {
1177 *status = U_INVALID_FORMAT_ERROR;
1178 error(line, "invariant characters required for table keys");
1179 return NULL;
1180 }
1181
1182 if (U_FAILURE(*status))
1183 {
1184 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
1185 return NULL;
1186 }
1187
1188 member = parseResource(state, subtag, &comment, status);
1189
1190 if (member == NULL || U_FAILURE(*status))
1191 {
1192 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
1193 return NULL;
1194 }
1195
1196 table_add(table, member, line, status);
1197
1198 if (U_FAILURE(*status))
1199 {
1200 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
1201 return NULL;
1202 }
1203 readToken = TRUE;
1204 ustr_deinit(&comment);
1205 }
1206
1207 /* not reached */
1208 /* A compiler warning will appear if all paths don't contain a return statement. */
1209 /* *status = U_INTERNAL_PROGRAM_ERROR;
1210 return NULL;*/
1211 }
1212
1213 static struct SResource *
1214 parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1215 {
1216 struct SResource *result;
1217
1218 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
1219 {
1220 return parseCollationElements(state, tag, startline, FALSE, status);
1221 }
1222 if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1223 {
1224 return parseCollationElements(state, tag, startline, TRUE, status);
1225 }
1226 if(isVerbose()){
1227 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1228 }
1229
1230 result = table_open(state->bundle, tag, comment, status);
1231
1232 if (result == NULL || U_FAILURE(*status))
1233 {
1234 return NULL;
1235 }
1236
1237 return realParseTable(state, result, tag, startline, status);
1238 }
1239
1240 static struct SResource *
1241 parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1242 {
1243 struct SResource *result = NULL;
1244 struct SResource *member = NULL;
1245 struct UString *tokenValue;
1246 struct UString memberComments;
1247 enum ETokenType token;
1248 UBool readToken = FALSE;
1249
1250 result = array_open(state->bundle, tag, comment, status);
1251
1252 if (result == NULL || U_FAILURE(*status))
1253 {
1254 return NULL;
1255 }
1256 if(isVerbose()){
1257 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1258 }
1259
1260 ustr_init(&memberComments);
1261
1262 /* '{' . resource [','] '}' */
1263 for (;;)
1264 {
1265 /* reset length */
1266 ustr_setlen(&memberComments, 0, status);
1267
1268 /* check for end of array, but don't consume next token unless it really is the end */
1269 token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
1270
1271
1272 if (token == TOK_CLOSE_BRACE)
1273 {
1274 getToken(state, NULL, NULL, NULL, status);
1275 if (!readToken) {
1276 warning(startline, "Encountered empty array");
1277 }
1278 break;
1279 }
1280
1281 if (token == TOK_EOF)
1282 {
1283 res_close(result);
1284 *status = U_INVALID_FORMAT_ERROR;
1285 error(startline, "unterminated array");
1286 return NULL;
1287 }
1288
1289 /* string arrays are a special case */
1290 if (token == TOK_STRING)
1291 {
1292 getToken(state, &tokenValue, &memberComments, NULL, status);
1293 member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
1294 }
1295 else
1296 {
1297 member = parseResource(state, NULL, &memberComments, status);
1298 }
1299
1300 if (member == NULL || U_FAILURE(*status))
1301 {
1302 res_close(result);
1303 return NULL;
1304 }
1305
1306 array_add(result, member, status);
1307
1308 if (U_FAILURE(*status))
1309 {
1310 res_close(result);
1311 return NULL;
1312 }
1313
1314 /* eat optional comma if present */
1315 token = peekToken(state, 0, NULL, NULL, NULL, status);
1316
1317 if (token == TOK_COMMA)
1318 {
1319 getToken(state, NULL, NULL, NULL, status);
1320 }
1321
1322 if (U_FAILURE(*status))
1323 {
1324 res_close(result);
1325 return NULL;
1326 }
1327 readToken = TRUE;
1328 }
1329
1330 ustr_deinit(&memberComments);
1331 return result;
1332 }
1333
1334 static struct SResource *
1335 parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1336 {
1337 struct SResource *result = NULL;
1338 enum ETokenType token;
1339 char *string;
1340 int32_t value;
1341 UBool readToken = FALSE;
1342 char *stopstring;
1343 uint32_t len;
1344 struct UString memberComments;
1345
1346 result = intvector_open(state->bundle, tag, comment, status);
1347
1348 if (result == NULL || U_FAILURE(*status))
1349 {
1350 return NULL;
1351 }
1352
1353 if(isVerbose()){
1354 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1355 }
1356 ustr_init(&memberComments);
1357 /* '{' . string [','] '}' */
1358 for (;;)
1359 {
1360 ustr_setlen(&memberComments, 0, status);
1361
1362 /* check for end of array, but don't consume next token unless it really is the end */
1363 token = peekToken(state, 0, NULL, NULL,&memberComments, status);
1364
1365 if (token == TOK_CLOSE_BRACE)
1366 {
1367 /* it's the end, consume the close brace */
1368 getToken(state, NULL, NULL, NULL, status);
1369 if (!readToken) {
1370 warning(startline, "Encountered empty int vector");
1371 }
1372 ustr_deinit(&memberComments);
1373 return result;
1374 }
1375
1376 string = getInvariantString(state, NULL, NULL, status);
1377
1378 if (U_FAILURE(*status))
1379 {
1380 res_close(result);
1381 return NULL;
1382 }
1383
1384 /* For handling illegal char in the Intvector */
1385 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1386 len=(uint32_t)(stopstring-string);
1387
1388 if(len==uprv_strlen(string))
1389 {
1390 intvector_add(result, value, status);
1391 uprv_free(string);
1392 token = peekToken(state, 0, NULL, NULL, NULL, status);
1393 }
1394 else
1395 {
1396 uprv_free(string);
1397 *status=U_INVALID_CHAR_FOUND;
1398 }
1399
1400 if (U_FAILURE(*status))
1401 {
1402 res_close(result);
1403 return NULL;
1404 }
1405
1406 /* the comma is optional (even though it is required to prevent the reader from concatenating
1407 consecutive entries) so that a missing comma on the last entry isn't an error */
1408 if (token == TOK_COMMA)
1409 {
1410 getToken(state, NULL, NULL, NULL, status);
1411 }
1412 readToken = TRUE;
1413 }
1414
1415 /* not reached */
1416 /* A compiler warning will appear if all paths don't contain a return statement. */
1417 /* intvector_close(result, status);
1418 *status = U_INTERNAL_PROGRAM_ERROR;
1419 return NULL;*/
1420 }
1421
1422 static struct SResource *
1423 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1424 {
1425 struct SResource *result = NULL;
1426 uint8_t *value;
1427 char *string;
1428 char toConv[3] = {'\0', '\0', '\0'};
1429 uint32_t count;
1430 uint32_t i;
1431 uint32_t line;
1432 char *stopstring;
1433 uint32_t len;
1434
1435 string = getInvariantString(state, &line, NULL, status);
1436
1437 if (string == NULL || U_FAILURE(*status))
1438 {
1439 return NULL;
1440 }
1441
1442 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1443
1444 if (U_FAILURE(*status))
1445 {
1446 uprv_free(string);
1447 return NULL;
1448 }
1449
1450 if(isVerbose()){
1451 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1452 }
1453
1454 count = (uint32_t)uprv_strlen(string);
1455 if (count > 0){
1456 if((count % 2)==0){
1457 value = uprv_malloc(sizeof(uint8_t) * count);
1458
1459 if (value == NULL)
1460 {
1461 uprv_free(string);
1462 *status = U_MEMORY_ALLOCATION_ERROR;
1463 return NULL;
1464 }
1465
1466 for (i = 0; i < count; i += 2)
1467 {
1468 toConv[0] = string[i];
1469 toConv[1] = string[i + 1];
1470
1471 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
1472 len=(uint32_t)(stopstring-toConv);
1473
1474 if(len!=uprv_strlen(toConv))
1475 {
1476 uprv_free(string);
1477 *status=U_INVALID_CHAR_FOUND;
1478 return NULL;
1479 }
1480 }
1481
1482 result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status);
1483
1484 uprv_free(value);
1485 }
1486 else
1487 {
1488 *status = U_INVALID_CHAR_FOUND;
1489 uprv_free(string);
1490 error(line, "Encountered invalid binary string");
1491 return NULL;
1492 }
1493 }
1494 else
1495 {
1496 result = bin_open(state->bundle, tag, 0, NULL, "",comment,status);
1497 warning(startline, "Encountered empty binary tag");
1498 }
1499 uprv_free(string);
1500
1501 return result;
1502 }
1503
1504 static struct SResource *
1505 parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1506 {
1507 struct SResource *result = NULL;
1508 int32_t value;
1509 char *string;
1510 char *stopstring;
1511 uint32_t len;
1512
1513 string = getInvariantString(state, NULL, NULL, status);
1514
1515 if (string == NULL || U_FAILURE(*status))
1516 {
1517 return NULL;
1518 }
1519
1520 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1521
1522 if (U_FAILURE(*status))
1523 {
1524 uprv_free(string);
1525 return NULL;
1526 }
1527
1528 if(isVerbose()){
1529 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1530 }
1531
1532 if (uprv_strlen(string) <= 0)
1533 {
1534 warning(startline, "Encountered empty integer. Default value is 0.");
1535 }
1536
1537 /* Allow integer support for hexdecimal, octal digit and decimal*/
1538 /* and handle illegal char in the integer*/
1539 value = uprv_strtoul(string, &stopstring, 0);
1540 len=(uint32_t)(stopstring-string);
1541 if(len==uprv_strlen(string))
1542 {
1543 result = int_open(state->bundle, tag, value, comment, status);
1544 }
1545 else
1546 {
1547 *status=U_INVALID_CHAR_FOUND;
1548 }
1549 uprv_free(string);
1550
1551 return result;
1552 }
1553
1554 static struct SResource *
1555 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1556 {
1557 struct SResource *result;
1558 FileStream *file;
1559 int32_t len;
1560 uint8_t *data;
1561 char *filename;
1562 uint32_t line;
1563 char *fullname = NULL;
1564 int32_t numRead = 0;
1565 filename = getInvariantString(state, &line, NULL, status);
1566
1567 if (U_FAILURE(*status))
1568 {
1569 return NULL;
1570 }
1571
1572 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1573
1574 if (U_FAILURE(*status))
1575 {
1576 uprv_free(filename);
1577 return NULL;
1578 }
1579
1580 if(isVerbose()){
1581 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1582 }
1583
1584 /* Open the input file for reading */
1585 if (state->inputdir == NULL)
1586 {
1587 #if 1
1588 /*
1589 * Always save file file name, even if there's
1590 * no input directory specified. MIGHT BREAK SOMETHING
1591 */
1592 int32_t filenameLength = uprv_strlen(filename);
1593
1594 fullname = (char *) uprv_malloc(filenameLength + 1);
1595 uprv_strcpy(fullname, filename);
1596 #endif
1597
1598 file = T_FileStream_open(filename, "rb");
1599 }
1600 else
1601 {
1602
1603 int32_t count = (int32_t)uprv_strlen(filename);
1604
1605 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1606 {
1607 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1608
1609 /* test for NULL */
1610 if(fullname == NULL)
1611 {
1612 *status = U_MEMORY_ALLOCATION_ERROR;
1613 return NULL;
1614 }
1615
1616 uprv_strcpy(fullname, state->inputdir);
1617
1618 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1619 fullname[state->inputdirLength + 1] = '\0';
1620
1621 uprv_strcat(fullname, filename);
1622 }
1623 else
1624 {
1625 fullname = (char *) uprv_malloc(state->inputdirLength + count + 1);
1626
1627 /* test for NULL */
1628 if(fullname == NULL)
1629 {
1630 *status = U_MEMORY_ALLOCATION_ERROR;
1631 return NULL;
1632 }
1633
1634 uprv_strcpy(fullname, state->inputdir);
1635 uprv_strcat(fullname, filename);
1636 }
1637
1638 file = T_FileStream_open(fullname, "rb");
1639
1640 }
1641
1642 if (file == NULL)
1643 {
1644 error(line, "couldn't open input file %s", filename);
1645 *status = U_FILE_ACCESS_ERROR;
1646 return NULL;
1647 }
1648
1649 len = T_FileStream_size(file);
1650 data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
1651 /* test for NULL */
1652 if(data == NULL)
1653 {
1654 *status = U_MEMORY_ALLOCATION_ERROR;
1655 T_FileStream_close (file);
1656 return NULL;
1657 }
1658
1659 numRead = T_FileStream_read (file, data, len);
1660 T_FileStream_close (file);
1661
1662 result = bin_open(state->bundle, tag, len, data, fullname, comment, status);
1663
1664 uprv_free(data);
1665 uprv_free(filename);
1666 uprv_free(fullname);
1667
1668 return result;
1669 }
1670
1671 static struct SResource *
1672 parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1673 {
1674 struct SResource *result;
1675 int32_t len=0;
1676 char *filename;
1677 uint32_t line;
1678 UChar *pTarget = NULL;
1679
1680 UCHARBUF *ucbuf;
1681 char *fullname = NULL;
1682 int32_t count = 0;
1683 const char* cp = NULL;
1684 const UChar* uBuffer = NULL;
1685
1686 filename = getInvariantString(state, &line, NULL, status);
1687 count = (int32_t)uprv_strlen(filename);
1688
1689 if (U_FAILURE(*status))
1690 {
1691 return NULL;
1692 }
1693
1694 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1695
1696 if (U_FAILURE(*status))
1697 {
1698 uprv_free(filename);
1699 return NULL;
1700 }
1701
1702 if(isVerbose()){
1703 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1704 }
1705
1706 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1707 /* test for NULL */
1708 if(fullname == NULL)
1709 {
1710 *status = U_MEMORY_ALLOCATION_ERROR;
1711 uprv_free(filename);
1712 return NULL;
1713 }
1714
1715 if(state->inputdir!=NULL){
1716 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1717 {
1718
1719 uprv_strcpy(fullname, state->inputdir);
1720
1721 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1722 fullname[state->inputdirLength + 1] = '\0';
1723
1724 uprv_strcat(fullname, filename);
1725 }
1726 else
1727 {
1728 uprv_strcpy(fullname, state->inputdir);
1729 uprv_strcat(fullname, filename);
1730 }
1731 }else{
1732 uprv_strcpy(fullname,filename);
1733 }
1734
1735 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1736
1737 if (U_FAILURE(*status)) {
1738 error(line, "couldn't open input file %s\n", filename);
1739 return NULL;
1740 }
1741
1742 uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
1743 result = string_open(state->bundle, tag, uBuffer, len, comment, status);
1744
1745 uprv_free(pTarget);
1746
1747 uprv_free(filename);
1748 uprv_free(fullname);
1749
1750 return result;
1751 }
1752
1753
1754
1755
1756
1757 U_STRING_DECL(k_type_string, "string", 6);
1758 U_STRING_DECL(k_type_binary, "binary", 6);
1759 U_STRING_DECL(k_type_bin, "bin", 3);
1760 U_STRING_DECL(k_type_table, "table", 5);
1761 U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17);
1762 U_STRING_DECL(k_type_int, "int", 3);
1763 U_STRING_DECL(k_type_integer, "integer", 7);
1764 U_STRING_DECL(k_type_array, "array", 5);
1765 U_STRING_DECL(k_type_alias, "alias", 5);
1766 U_STRING_DECL(k_type_intvector, "intvector", 9);
1767 U_STRING_DECL(k_type_import, "import", 6);
1768 U_STRING_DECL(k_type_include, "include", 7);
1769 U_STRING_DECL(k_type_reserved, "reserved", 8);
1770
1771 /* Various non-standard processing plugins that create one or more special resources. */
1772 U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1773 U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18);
1774 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23);
1775 U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19);
1776
1777 typedef enum EResourceType
1778 {
1779 RT_UNKNOWN,
1780 RT_STRING,
1781 RT_BINARY,
1782 RT_TABLE,
1783 RT_TABLE_NO_FALLBACK,
1784 RT_INTEGER,
1785 RT_ARRAY,
1786 RT_ALIAS,
1787 RT_INTVECTOR,
1788 RT_IMPORT,
1789 RT_INCLUDE,
1790 RT_PROCESS_UCA_RULES,
1791 RT_PROCESS_COLLATION,
1792 RT_PROCESS_TRANSLITERATOR,
1793 RT_PROCESS_DEPENDENCY,
1794 RT_RESERVED
1795 } EResourceType;
1796
1797 static struct {
1798 const char *nameChars; /* only used for debugging */
1799 const UChar *nameUChars;
1800 ParseResourceFunction *parseFunction;
1801 } gResourceTypes[] = {
1802 {"Unknown", NULL, NULL},
1803 {"string", k_type_string, parseString},
1804 {"binary", k_type_binary, parseBinary},
1805 {"table", k_type_table, parseTable},
1806 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1807 {"integer", k_type_integer, parseInteger},
1808 {"array", k_type_array, parseArray},
1809 {"alias", k_type_alias, parseAlias},
1810 {"intvector", k_type_intvector, parseIntVector},
1811 {"import", k_type_import, parseImport},
1812 {"include", k_type_include, parseInclude},
1813 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1814 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1815 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1816 {"process(dependency)", k_type_plugin_dependency, parseDependency},
1817 {"reserved", NULL, NULL}
1818 };
1819
1820 void initParser(UBool omitBinaryCollation, UBool omitCollationRules)
1821 {
1822 U_STRING_INIT(k_type_string, "string", 6);
1823 U_STRING_INIT(k_type_binary, "binary", 6);
1824 U_STRING_INIT(k_type_bin, "bin", 3);
1825 U_STRING_INIT(k_type_table, "table", 5);
1826 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17);
1827 U_STRING_INIT(k_type_int, "int", 3);
1828 U_STRING_INIT(k_type_integer, "integer", 7);
1829 U_STRING_INIT(k_type_array, "array", 5);
1830 U_STRING_INIT(k_type_alias, "alias", 5);
1831 U_STRING_INIT(k_type_intvector, "intvector", 9);
1832 U_STRING_INIT(k_type_import, "import", 6);
1833 U_STRING_INIT(k_type_reserved, "reserved", 8);
1834 U_STRING_INIT(k_type_include, "include", 7);
1835
1836 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1837 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18);
1838 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23);
1839 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19);
1840
1841 gMakeBinaryCollation = !omitBinaryCollation;
1842 gOmitCollationRules = omitCollationRules;
1843 }
1844
1845 static U_INLINE UBool isTable(enum EResourceType type) {
1846 return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK);
1847 }
1848
1849 static enum EResourceType
1850 parseResourceType(ParseState* state, UErrorCode *status)
1851 {
1852 struct UString *tokenValue;
1853 struct UString comment;
1854 enum EResourceType result = RT_UNKNOWN;
1855 uint32_t line=0;
1856 ustr_init(&comment);
1857 expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
1858
1859 if (U_FAILURE(*status))
1860 {
1861 return RT_UNKNOWN;
1862 }
1863
1864 *status = U_ZERO_ERROR;
1865
1866 /* Search for normal types */
1867 result=RT_UNKNOWN;
1868 while (++result < RT_RESERVED) {
1869 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1870 break;
1871 }
1872 }
1873 /* Now search for the aliases */
1874 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
1875 result = RT_INTEGER;
1876 }
1877 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
1878 result = RT_BINARY;
1879 }
1880 else if (result == RT_RESERVED) {
1881 char tokenBuffer[1024];
1882 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1883 tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1884 *status = U_INVALID_FORMAT_ERROR;
1885 error(line, "unknown resource type '%s'", tokenBuffer);
1886 }
1887
1888 return result;
1889 }
1890
1891 /* parse a non-top-level resource */
1892 static struct SResource *
1893 parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
1894 {
1895 enum ETokenType token;
1896 enum EResourceType resType = RT_UNKNOWN;
1897 ParseResourceFunction *parseFunction = NULL;
1898 struct UString *tokenValue;
1899 uint32_t startline;
1900 uint32_t line;
1901
1902 token = getToken(state, &tokenValue, NULL, &startline, status);
1903
1904 if(isVerbose()){
1905 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1906 }
1907
1908 /* name . [ ':' type ] '{' resource '}' */
1909 /* This function parses from the colon onwards. If the colon is present, parse the
1910 type then try to parse a resource of that type. If there is no explicit type,
1911 work it out using the lookahead tokens. */
1912 switch (token)
1913 {
1914 case TOK_EOF:
1915 *status = U_INVALID_FORMAT_ERROR;
1916 error(startline, "Unexpected EOF encountered");
1917 return NULL;
1918
1919 case TOK_ERROR:
1920 *status = U_INVALID_FORMAT_ERROR;
1921 return NULL;
1922
1923 case TOK_COLON:
1924 resType = parseResourceType(state, status);
1925 expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
1926
1927 if (U_FAILURE(*status))
1928 {
1929 return NULL;
1930 }
1931
1932 break;
1933
1934 case TOK_OPEN_BRACE:
1935 break;
1936
1937 default:
1938 *status = U_INVALID_FORMAT_ERROR;
1939 error(startline, "syntax error while reading a resource, expected '{' or ':'");
1940 return NULL;
1941 }
1942
1943 if (resType == RT_UNKNOWN)
1944 {
1945 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
1946 We could have any of the following:
1947 { { => array (nested)
1948 { :/} => array
1949 { string , => string array
1950
1951 { string { => table
1952
1953 { string :/{ => table
1954 { string } => string
1955 */
1956
1957 token = peekToken(state, 0, NULL, &line, NULL,status);
1958
1959 if (U_FAILURE(*status))
1960 {
1961 return NULL;
1962 }
1963
1964 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
1965 {
1966 resType = RT_ARRAY;
1967 }
1968 else if (token == TOK_STRING)
1969 {
1970 token = peekToken(state, 1, NULL, &line, NULL, status);
1971
1972 if (U_FAILURE(*status))
1973 {
1974 return NULL;
1975 }
1976
1977 switch (token)
1978 {
1979 case TOK_COMMA: resType = RT_ARRAY; break;
1980 case TOK_OPEN_BRACE: resType = RT_TABLE; break;
1981 case TOK_CLOSE_BRACE: resType = RT_STRING; break;
1982 case TOK_COLON: resType = RT_TABLE; break;
1983 default:
1984 *status = U_INVALID_FORMAT_ERROR;
1985 error(line, "Unexpected token after string, expected ',', '{' or '}'");
1986 return NULL;
1987 }
1988 }
1989 else
1990 {
1991 *status = U_INVALID_FORMAT_ERROR;
1992 error(line, "Unexpected token after '{'");
1993 return NULL;
1994 }
1995
1996 /* printf("Type guessed as %s\n", resourceNames[resType]); */
1997 } else if(resType == RT_TABLE_NO_FALLBACK) {
1998 *status = U_INVALID_FORMAT_ERROR;
1999 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
2000 return NULL;
2001 }
2002
2003 /* We should now know what we need to parse next, so call the appropriate parser
2004 function and return. */
2005 parseFunction = gResourceTypes[resType].parseFunction;
2006 if (parseFunction != NULL) {
2007 return parseFunction(state, tag, startline, comment, status);
2008 }
2009 else {
2010 *status = U_INTERNAL_PROGRAM_ERROR;
2011 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
2012 }
2013
2014 return NULL;
2015 }
2016
2017 /* parse the top-level resource */
2018 struct SRBRoot *
2019 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UErrorCode *status)
2020 {
2021 struct UString *tokenValue;
2022 struct UString comment;
2023 uint32_t line;
2024 enum EResourceType bundleType;
2025 enum ETokenType token;
2026 ParseState state;
2027 uint32_t i;
2028 int encLength;
2029 char* enc;
2030 for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
2031 {
2032 ustr_init(&state.lookahead[i].value);
2033 ustr_init(&state.lookahead[i].comment);
2034 }
2035
2036 initLookahead(&state, buf, status);
2037
2038 state.inputdir = inputDir;
2039 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
2040 state.outputdir = outputDir;
2041 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
2042
2043 ustr_init(&comment);
2044 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
2045
2046 state.bundle = bundle_open(&comment, FALSE, status);
2047
2048 if (state.bundle == NULL || U_FAILURE(*status))
2049 {
2050 return NULL;
2051 }
2052
2053
2054 bundle_setlocale(state.bundle, tokenValue->fChars, status);
2055
2056 /* The following code is to make Empty bundle work no matter with :table specifer or not */
2057 token = getToken(&state, NULL, NULL, &line, status);
2058 if(token==TOK_COLON) {
2059 *status=U_ZERO_ERROR;
2060 bundleType=parseResourceType(&state, status);
2061
2062 if(isTable(bundleType))
2063 {
2064 expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
2065 }
2066 else
2067 {
2068 *status=U_PARSE_ERROR;
2069 /* printf("asdsdweqdasdad\n"); */
2070
2071 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
2072 }
2073 }
2074 else
2075 {
2076 /* not a colon */
2077 if(token==TOK_OPEN_BRACE)
2078 {
2079 *status=U_ZERO_ERROR;
2080 bundleType=RT_TABLE;
2081 }
2082 else
2083 {
2084 /* neither colon nor open brace */
2085 *status=U_PARSE_ERROR;
2086 bundleType=RT_UNKNOWN;
2087 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
2088 }
2089 }
2090
2091 if (U_FAILURE(*status))
2092 {
2093 bundle_close(state.bundle, status);
2094 return NULL;
2095 }
2096
2097 if(bundleType==RT_TABLE_NO_FALLBACK) {
2098 /*
2099 * Parse a top-level table with the table(nofallback) declaration.
2100 * This is the same as a regular table, but also sets the
2101 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2102 */
2103 state.bundle->noFallback=TRUE;
2104 }
2105 /* top-level tables need not handle special table names like "collations" */
2106 realParseTable(&state, state.bundle->fRoot, NULL, line, status);
2107
2108 if(dependencyArray!=NULL){
2109 table_add(state.bundle->fRoot, dependencyArray, 0, status);
2110 dependencyArray = NULL;
2111 }
2112 if (U_FAILURE(*status))
2113 {
2114 bundle_close(state.bundle, status);
2115 res_close(dependencyArray);
2116 return NULL;
2117 }
2118
2119 if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
2120 {
2121 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
2122 if(isStrict()){
2123 *status = U_INVALID_FORMAT_ERROR;
2124 return NULL;
2125 }
2126 }
2127
2128 cleanupLookahead(&state);
2129 ustr_deinit(&comment);
2130 return state.bundle;
2131 }