]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/genrb/parse.cpp
ICU-511.25.tar.gz
[apple/icu.git] / icuSources / tools / genrb / parse.cpp
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1998-2012, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File parse.cpp
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 05/26/99 stephen Creation.
15 * 02/25/00 weiv Overhaul to write udata
16 * 5/10/01 Ram removed ustdio dependency
17 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
18 *******************************************************************************
19 */
20
21 #include "ucol_imp.h"
22 #include "parse.h"
23 #include "errmsg.h"
24 #include "uhash.h"
25 #include "cmemory.h"
26 #include "cstring.h"
27 #include "uinvchar.h"
28 #include "read.h"
29 #include "ustr.h"
30 #include "reslist.h"
31 #include "rbt_pars.h"
32 #include "genrb.h"
33 #include "unicode/ustring.h"
34 #include "unicode/uscript.h"
35 #include "unicode/putil.h"
36 #include <stdio.h>
37
38 /* Number of tokens to read ahead of the current stream position */
39 #define MAX_LOOKAHEAD 3
40
41 #define CR 0x000D
42 #define LF 0x000A
43 #define SPACE 0x0020
44 #define TAB 0x0009
45 #define ESCAPE 0x005C
46 #define HASH 0x0023
47 #define QUOTE 0x0027
48 #define ZERO 0x0030
49 #define STARTCOMMAND 0x005B
50 #define ENDCOMMAND 0x005D
51 #define OPENSQBRACKET 0x005B
52 #define CLOSESQBRACKET 0x005D
53
54 struct Lookahead
55 {
56 enum ETokenType type;
57 struct UString value;
58 struct UString comment;
59 uint32_t line;
60 };
61
62 /* keep in sync with token defines in read.h */
63 const char *tokenNames[TOK_TOKEN_COUNT] =
64 {
65 "string", /* A string token, such as "MonthNames" */
66 "'{'", /* An opening brace character */
67 "'}'", /* A closing brace character */
68 "','", /* A comma */
69 "':'", /* A colon */
70
71 "<end of file>", /* End of the file has been reached successfully */
72 "<end of line>"
73 };
74
75 /* Just to store "TRUE" */
76 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
77
78 typedef struct {
79 struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
80 uint32_t lookaheadPosition;
81 UCHARBUF *buffer;
82 struct SRBRoot *bundle;
83 const char *inputdir;
84 uint32_t inputdirLength;
85 const char *outputdir;
86 uint32_t outputdirLength;
87 UBool makeBinaryCollation;
88 } ParseState;
89
90 static UBool gOmitCollationRules = FALSE;
91
92 typedef struct SResource *
93 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
94
95 static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
96
97 /* The nature of the lookahead buffer:
98 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
99 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
100 When getToken is called, the current pointer is moved to the next slot and the
101 old slot is filled with the next token from the reader by calling getNextToken.
102 The token values are stored in the slot, which means that token values don't
103 survive a call to getToken, ie.
104
105 UString *value;
106
107 getToken(&value, NULL, status);
108 getToken(NULL, NULL, status); bad - value is now a different string
109 */
110 static void
111 initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
112 {
113 static uint32_t initTypeStrings = 0;
114 uint32_t i;
115
116 if (!initTypeStrings)
117 {
118 initTypeStrings = 1;
119 }
120
121 state->lookaheadPosition = 0;
122 state->buffer = buf;
123
124 resetLineNumber();
125
126 for (i = 0; i < MAX_LOOKAHEAD; i++)
127 {
128 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
129 if (U_FAILURE(*status))
130 {
131 return;
132 }
133 }
134
135 *status = U_ZERO_ERROR;
136 }
137
138 static void
139 cleanupLookahead(ParseState* state)
140 {
141 uint32_t i;
142 for (i = 0; i <= MAX_LOOKAHEAD; i++)
143 {
144 ustr_deinit(&state->lookahead[i].value);
145 ustr_deinit(&state->lookahead[i].comment);
146 }
147
148 }
149
150 static enum ETokenType
151 getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
152 {
153 enum ETokenType result;
154 uint32_t i;
155
156 result = state->lookahead[state->lookaheadPosition].type;
157
158 if (tokenValue != NULL)
159 {
160 *tokenValue = &state->lookahead[state->lookaheadPosition].value;
161 }
162
163 if (linenumber != NULL)
164 {
165 *linenumber = state->lookahead[state->lookaheadPosition].line;
166 }
167
168 if (comment != NULL)
169 {
170 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
171 }
172
173 i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
174 state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
175 ustr_setlen(&state->lookahead[i].comment, 0, status);
176 ustr_setlen(&state->lookahead[i].value, 0, status);
177 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
178
179 /* printf("getToken, returning %s\n", tokenNames[result]); */
180
181 return result;
182 }
183
184 static enum ETokenType
185 peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
186 {
187 uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
188
189 if (U_FAILURE(*status))
190 {
191 return TOK_ERROR;
192 }
193
194 if (lookaheadCount >= MAX_LOOKAHEAD)
195 {
196 *status = U_INTERNAL_PROGRAM_ERROR;
197 return TOK_ERROR;
198 }
199
200 if (tokenValue != NULL)
201 {
202 *tokenValue = &state->lookahead[i].value;
203 }
204
205 if (linenumber != NULL)
206 {
207 *linenumber = state->lookahead[i].line;
208 }
209
210 if(comment != NULL){
211 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
212 }
213
214 return state->lookahead[i].type;
215 }
216
217 static void
218 expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
219 {
220 uint32_t line;
221
222 enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
223
224 if (linenumber != NULL)
225 {
226 *linenumber = line;
227 }
228
229 if (U_FAILURE(*status))
230 {
231 return;
232 }
233
234 if (token != expectedToken)
235 {
236 *status = U_INVALID_FORMAT_ERROR;
237 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
238 }
239 else
240 {
241 *status = U_ZERO_ERROR;
242 }
243 }
244
245 static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
246 {
247 struct UString *tokenValue;
248 char *result;
249 uint32_t count;
250
251 expect(state, TOK_STRING, &tokenValue, comment, line, status);
252
253 if (U_FAILURE(*status))
254 {
255 return NULL;
256 }
257
258 count = u_strlen(tokenValue->fChars);
259 if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
260 *status = U_INVALID_FORMAT_ERROR;
261 error(*line, "invariant characters required for table keys, binary data, etc.");
262 return NULL;
263 }
264
265 result = static_cast<char *>(uprv_malloc(count+1));
266
267 if (result == NULL)
268 {
269 *status = U_MEMORY_ALLOCATION_ERROR;
270 return NULL;
271 }
272
273 u_UCharsToChars(tokenValue->fChars, result, count+1);
274 return result;
275 }
276
277 static struct SResource *
278 parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
279 {
280 struct SResource *result = NULL;
281 struct UString *tokenValue;
282 FileStream *file = NULL;
283 char filename[256] = { '\0' };
284 char cs[128] = { '\0' };
285 uint32_t line;
286 UBool quoted = FALSE;
287 UCHARBUF *ucbuf=NULL;
288 UChar32 c = 0;
289 const char* cp = NULL;
290 UChar *pTarget = NULL;
291 UChar *target = NULL;
292 UChar *targetLimit = NULL;
293 int32_t size = 0;
294
295 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
296
297 if(isVerbose()){
298 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
299 }
300
301 if (U_FAILURE(*status))
302 {
303 return NULL;
304 }
305 /* make the filename including the directory */
306 if (state->inputdir != NULL)
307 {
308 uprv_strcat(filename, state->inputdir);
309
310 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
311 {
312 uprv_strcat(filename, U_FILE_SEP_STRING);
313 }
314 }
315
316 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
317
318 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
319
320 if (U_FAILURE(*status))
321 {
322 return NULL;
323 }
324 uprv_strcat(filename, cs);
325
326 if(gOmitCollationRules) {
327 return res_none();
328 }
329
330 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
331
332 if (U_FAILURE(*status)) {
333 error(line, "An error occured while opening the input file %s\n", filename);
334 return NULL;
335 }
336
337 /* We allocate more space than actually required
338 * since the actual size needed for storing UChars
339 * is not known in UTF-8 byte stream
340 */
341 size = ucbuf_size(ucbuf) + 1;
342 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
343 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
344 target = pTarget;
345 targetLimit = pTarget+size;
346
347 /* read the rules into the buffer */
348 while (target < targetLimit)
349 {
350 c = ucbuf_getc(ucbuf, status);
351 if(c == QUOTE) {
352 quoted = (UBool)!quoted;
353 }
354 /* weiv (06/26/2002): adding the following:
355 * - preserving spaces in commands [...]
356 * - # comments until the end of line
357 */
358 if (c == STARTCOMMAND && !quoted)
359 {
360 /* preserve commands
361 * closing bracket will be handled by the
362 * append at the end of the loop
363 */
364 while(c != ENDCOMMAND) {
365 U_APPEND_CHAR32_ONLY(c, target);
366 c = ucbuf_getc(ucbuf, status);
367 }
368 }
369 else if (c == HASH && !quoted) {
370 /* skip comments */
371 while(c != CR && c != LF) {
372 c = ucbuf_getc(ucbuf, status);
373 }
374 continue;
375 }
376 else if (c == ESCAPE)
377 {
378 c = unescape(ucbuf, status);
379
380 if (c == (UChar32)U_ERR)
381 {
382 uprv_free(pTarget);
383 T_FileStream_close(file);
384 return NULL;
385 }
386 }
387 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
388 {
389 /* ignore spaces carriage returns
390 * and line feed unless in the form \uXXXX
391 */
392 continue;
393 }
394
395 /* Append UChar * after dissembling if c > 0xffff*/
396 if (c != (UChar32)U_EOF)
397 {
398 U_APPEND_CHAR32_ONLY(c, target);
399 }
400 else
401 {
402 break;
403 }
404 }
405
406 /* terminate the string */
407 if(target < targetLimit){
408 *target = 0x0000;
409 }
410
411 result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
412
413
414 ucbuf_close(ucbuf);
415 uprv_free(pTarget);
416 T_FileStream_close(file);
417
418 return result;
419 }
420
421 static struct SResource *
422 parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
423 {
424 struct SResource *result = NULL;
425 struct UString *tokenValue;
426 FileStream *file = NULL;
427 char filename[256] = { '\0' };
428 char cs[128] = { '\0' };
429 uint32_t line;
430 UCHARBUF *ucbuf=NULL;
431 const char* cp = NULL;
432 UChar *pTarget = NULL;
433 const UChar *pSource = NULL;
434 int32_t size = 0;
435
436 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
437
438 if(isVerbose()){
439 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
440 }
441
442 if (U_FAILURE(*status))
443 {
444 return NULL;
445 }
446 /* make the filename including the directory */
447 if (state->inputdir != NULL)
448 {
449 uprv_strcat(filename, state->inputdir);
450
451 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
452 {
453 uprv_strcat(filename, U_FILE_SEP_STRING);
454 }
455 }
456
457 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
458
459 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
460
461 if (U_FAILURE(*status))
462 {
463 return NULL;
464 }
465 uprv_strcat(filename, cs);
466
467
468 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
469
470 if (U_FAILURE(*status)) {
471 error(line, "An error occured while opening the input file %s\n", filename);
472 return NULL;
473 }
474
475 /* We allocate more space than actually required
476 * since the actual size needed for storing UChars
477 * is not known in UTF-8 byte stream
478 */
479 pSource = ucbuf_getBuffer(ucbuf, &size, status);
480 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
481 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
482
483 #if !UCONFIG_NO_TRANSLITERATION
484 size = utrans_stripRules(pSource, size, pTarget, status);
485 #else
486 size = 0;
487 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
488 #endif
489 result = string_open(state->bundle, tag, pTarget, size, NULL, status);
490
491 ucbuf_close(ucbuf);
492 uprv_free(pTarget);
493 T_FileStream_close(file);
494
495 return result;
496 }
497 static struct SResource* dependencyArray = NULL;
498
499 static struct SResource *
500 parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
501 {
502 struct SResource *result = NULL;
503 struct SResource *elem = NULL;
504 struct UString *tokenValue;
505 uint32_t line;
506 char filename[256] = { '\0' };
507 char cs[128] = { '\0' };
508
509 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
510
511 if(isVerbose()){
512 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
513 }
514
515 if (U_FAILURE(*status))
516 {
517 return NULL;
518 }
519 /* make the filename including the directory */
520 if (state->outputdir != NULL)
521 {
522 uprv_strcat(filename, state->outputdir);
523
524 if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
525 {
526 uprv_strcat(filename, U_FILE_SEP_STRING);
527 }
528 }
529
530 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
531
532 if (U_FAILURE(*status))
533 {
534 return NULL;
535 }
536 uprv_strcat(filename, cs);
537 if(!T_FileStream_file_exists(filename)){
538 if(isStrict()){
539 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
540 }else{
541 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
542 }
543 }
544 if(dependencyArray==NULL){
545 dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
546 }
547 if(tag!=NULL){
548 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
549 }
550 elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
551
552 array_add(dependencyArray, elem, status);
553
554 if (U_FAILURE(*status))
555 {
556 return NULL;
557 }
558 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
559 return result;
560 }
561 static struct SResource *
562 parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
563 {
564 struct UString *tokenValue;
565 struct SResource *result = NULL;
566
567 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
568 {
569 return parseUCARules(tag, startline, status);
570 }*/
571 if(isVerbose()){
572 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
573 }
574 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
575
576 if (U_SUCCESS(*status))
577 {
578 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
579 doesn't survive expect either) */
580
581 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
582 if(U_SUCCESS(*status) && result) {
583 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
584
585 if (U_FAILURE(*status))
586 {
587 res_close(result);
588 return NULL;
589 }
590 }
591 }
592
593 return result;
594 }
595
596 static struct SResource *
597 parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
598 {
599 struct UString *tokenValue;
600 struct SResource *result = NULL;
601
602 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
603
604 if(isVerbose()){
605 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
606 }
607
608 if (U_SUCCESS(*status))
609 {
610 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
611 doesn't survive expect either) */
612
613 result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
614
615 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
616
617 if (U_FAILURE(*status))
618 {
619 res_close(result);
620 return NULL;
621 }
622 }
623
624 return result;
625 }
626
627 typedef struct{
628 const char* inputDir;
629 const char* outputDir;
630 } GenrbData;
631
632 static struct SResource* resLookup(struct SResource* res, const char* key){
633 struct SResource *current = NULL;
634 struct SResTable *list;
635 if (res == res_none()) {
636 return NULL;
637 }
638
639 list = &(res->u.fTable);
640
641 current = list->fFirst;
642 while (current != NULL) {
643 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
644 return current;
645 }
646 current = current->fNext;
647 }
648 return NULL;
649 }
650
651 static const UChar* importFromDataFile(void* context, const char* locale, const char* type, int32_t* pLength, UErrorCode* status){
652 struct SRBRoot *data = NULL;
653 UCHARBUF *ucbuf = NULL;
654 GenrbData* genrbdata = (GenrbData*) context;
655 int localeLength = strlen(locale);
656 char* filename = (char*)uprv_malloc(localeLength+5);
657 char *inputDirBuf = NULL;
658 char *openFileName = NULL;
659 const char* cp = "";
660 UChar* urules = NULL;
661 int32_t urulesLength = 0;
662 int32_t i = 0;
663 int32_t dirlen = 0;
664 int32_t filelen = 0;
665 struct SResource* root;
666 struct SResource* collations;
667 struct SResource* collation;
668 struct SResource* sequence;
669
670 memcpy(filename, locale, localeLength);
671 for(i = 0; i < localeLength; i++){
672 if(filename[i] == '-'){
673 filename[i] = '_';
674 }
675 }
676 filename[localeLength] = '.';
677 filename[localeLength+1] = 't';
678 filename[localeLength+2] = 'x';
679 filename[localeLength+3] = 't';
680 filename[localeLength+4] = 0;
681
682
683 if (status==NULL || U_FAILURE(*status)) {
684 return NULL;
685 }
686 if(filename==NULL){
687 *status=U_ILLEGAL_ARGUMENT_ERROR;
688 return NULL;
689 }else{
690 filelen = (int32_t)uprv_strlen(filename);
691 }
692 if(genrbdata->inputDir == NULL) {
693 const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
694 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
695 openFileName[0] = '\0';
696 if (filenameBegin != NULL) {
697 /*
698 * When a filename ../../../data/root.txt is specified,
699 * we presume that the input directory is ../../../data
700 * This is very important when the resource file includes
701 * another file, like UCARules.txt or thaidict.brk.
702 */
703 int32_t filenameSize = (int32_t)(filenameBegin - filename + 1);
704 inputDirBuf = uprv_strncpy((char *)uprv_malloc(filenameSize), filename, filenameSize);
705
706 /* test for NULL */
707 if(inputDirBuf == NULL) {
708 *status = U_MEMORY_ALLOCATION_ERROR;
709 goto finish;
710 }
711
712 inputDirBuf[filenameSize - 1] = 0;
713 genrbdata->inputDir = inputDirBuf;
714 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir);
715 }
716 }else{
717 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir);
718
719 if(genrbdata->inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
720 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
721
722 /* test for NULL */
723 if(openFileName == NULL) {
724 *status = U_MEMORY_ALLOCATION_ERROR;
725 goto finish;
726 }
727
728 openFileName[0] = '\0';
729 /*
730 * append the input dir to openFileName if the first char in
731 * filename is not file seperation char and the last char input directory is not '.'.
732 * This is to support :
733 * genrb -s. /home/icu/data
734 * genrb -s. icu/data
735 * The user cannot mix notations like
736 * genrb -s. /icu/data --- the absolute path specified. -s redundant
737 * user should use
738 * genrb -s. icu/data --- start from CWD and look in icu/data dir
739 */
740 if( (filename[0] != U_FILE_SEP_CHAR) && (genrbdata->inputDir[dirlen-1] !='.')){
741 uprv_strcpy(openFileName, genrbdata->inputDir);
742 openFileName[dirlen] = U_FILE_SEP_CHAR;
743 }
744 openFileName[dirlen + 1] = '\0';
745 } else {
746 openFileName = (char *) uprv_malloc(dirlen + filelen + 1);
747
748 /* test for NULL */
749 if(openFileName == NULL) {
750 *status = U_MEMORY_ALLOCATION_ERROR;
751 goto finish;
752 }
753
754 uprv_strcpy(openFileName, genrbdata->inputDir);
755
756 }
757 }
758 uprv_strcat(openFileName, filename);
759 /* printf("%s\n", openFileName); */
760 *status = U_ZERO_ERROR;
761 ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, status);
762
763 if(*status == U_FILE_ACCESS_ERROR) {
764
765 fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName);
766 goto finish;
767 }
768 if (ucbuf == NULL || U_FAILURE(*status)) {
769 fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(*status));
770 goto finish;
771 }
772
773 /* Parse the data into an SRBRoot */
774 data = parse(ucbuf, genrbdata->inputDir, genrbdata->outputDir, FALSE, status);
775
776 root = data->fRoot;
777 collations = resLookup(root, "collations");
778 if (collations != NULL) {
779 collation = resLookup(collations, type);
780 if (collation != NULL) {
781 sequence = resLookup(collation, "Sequence");
782 if (sequence != NULL) {
783 urules = sequence->u.fString.fChars;
784 urulesLength = sequence->u.fString.fLength;
785 *pLength = urulesLength;
786 }
787 }
788 }
789
790 finish:
791 if (inputDirBuf != NULL) {
792 uprv_free(inputDirBuf);
793 }
794
795 if (openFileName != NULL) {
796 uprv_free(openFileName);
797 }
798
799 if(ucbuf) {
800 ucbuf_close(ucbuf);
801 }
802
803 return urules;
804 }
805
806 // Quick-and-dirty escaping function.
807 // Assumes that we are on an ASCII-based platform.
808 static void
809 escape(const UChar *s, char *buffer) {
810 int32_t length = u_strlen(s);
811 int32_t i = 0;
812 for (;;) {
813 UChar32 c;
814 U16_NEXT(s, i, length, c);
815 if (c == 0) {
816 *buffer = 0;
817 return;
818 } else if (0x20 <= c && c <= 0x7e) {
819 // printable ASCII
820 *buffer++ = (char)c; // assumes ASCII-based platform
821 } else {
822 buffer += sprintf(buffer, "\\u%04X", (int)c);
823 }
824 }
825 }
826
827 static struct SResource *
828 addCollation(ParseState* state, struct SResource *result, uint32_t startline, UErrorCode *status)
829 {
830 struct SResource *member = NULL;
831 struct UString *tokenValue;
832 struct UString comment;
833 enum ETokenType token;
834 char subtag[1024];
835 UVersionInfo version;
836 uint32_t line;
837 GenrbData genrbdata;
838 /* '{' . (name resource)* '}' */
839 version[0]=0; version[1]=0; version[2]=0; version[3]=0;
840
841 for (;;)
842 {
843 ustr_init(&comment);
844 token = getToken(state, &tokenValue, &comment, &line, status);
845
846 if (token == TOK_CLOSE_BRACE)
847 {
848 return result;
849 }
850
851 if (token != TOK_STRING)
852 {
853 res_close(result);
854 *status = U_INVALID_FORMAT_ERROR;
855
856 if (token == TOK_EOF)
857 {
858 error(startline, "unterminated table");
859 }
860 else
861 {
862 error(line, "Unexpected token %s", tokenNames[token]);
863 }
864
865 return NULL;
866 }
867
868 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
869
870 if (U_FAILURE(*status))
871 {
872 res_close(result);
873 return NULL;
874 }
875
876 member = parseResource(state, subtag, NULL, status);
877
878 if (U_FAILURE(*status))
879 {
880 res_close(result);
881 return NULL;
882 }
883
884 if (uprv_strcmp(subtag, "Version") == 0)
885 {
886 char ver[40];
887 int32_t length = member->u.fString.fLength;
888
889 if (length >= (int32_t) sizeof(ver))
890 {
891 length = (int32_t) sizeof(ver) - 1;
892 }
893
894 u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
895 u_versionFromString(version, ver);
896
897 table_add(result, member, line, status);
898
899 }
900 else if (uprv_strcmp(subtag, "Override") == 0)
901 {
902 // UBool override = (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0);
903 table_add(result, member, line, status);
904
905 }
906 else if(uprv_strcmp(subtag, "%%CollationBin")==0)
907 {
908 /* discard duplicate %%CollationBin if any*/
909 }
910 else if (uprv_strcmp(subtag, "Sequence") == 0)
911 {
912 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
913 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
914 #else
915 if(state->makeBinaryCollation) {
916
917 /* do the collation elements */
918 int32_t len = 0;
919 uint8_t *data = NULL;
920 UCollator *coll = NULL;
921 int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)];
922 int32_t reorderCodeCount;
923 int32_t reorderCodeIndex;
924 UParseError parseError;
925
926 genrbdata.inputDir = state->inputdir;
927 genrbdata.outputDir = state->outputdir;
928
929 UErrorCode intStatus = U_ZERO_ERROR;
930 uprv_memset(&parseError, 0, sizeof(parseError));
931 coll = ucol_openRulesForImport(member->u.fString.fChars, member->u.fString.fLength,
932 UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, importFromDataFile, &genrbdata, &intStatus);
933
934 if (U_SUCCESS(intStatus) && coll != NULL)
935 {
936 len = ucol_cloneBinary(coll, NULL, 0, &intStatus);
937 data = (uint8_t *)uprv_malloc(len);
938 intStatus = U_ZERO_ERROR;
939 len = ucol_cloneBinary(coll, data, len, &intStatus);
940 /*data = ucol_cloneRuleData(coll, &len, &intStatus);*/
941
942 /* tailoring rules version */
943 /* This is wrong! */
944 /*coll->dataInfo.dataVersion[1] = version[0];*/
945 /* Copy tailoring version. Builder version already */
946 /* set in ucol_openRules */
947 ((UCATableHeader *)data)->version[1] = version[0];
948 ((UCATableHeader *)data)->version[2] = version[1];
949 ((UCATableHeader *)data)->version[3] = version[2];
950
951 if (U_SUCCESS(intStatus) && data != NULL)
952 {
953 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", len, data, NULL, NULL, status);
954 table_add(result, collationBin, line, status);
955 uprv_free(data);
956
957 reorderCodeCount = ucol_getReorderCodes(
958 coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST), &intStatus);
959 if (U_SUCCESS(intStatus) && reorderCodeCount > 0) {
960 struct SResource *reorderCodeRes = intvector_open(state->bundle, "%%ReorderCodes", NULL, status);
961 for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodeCount; reorderCodeIndex++) {
962 intvector_add(reorderCodeRes, reorderCodes[reorderCodeIndex], status);
963 }
964 table_add(result, reorderCodeRes, line, status);
965 }
966 }
967 else
968 {
969 warning(line, "could not obtain rules from collator");
970 if(isStrict()){
971 *status = U_INVALID_FORMAT_ERROR;
972 return NULL;
973 }
974 }
975
976 ucol_close(coll);
977 }
978 else
979 {
980 if(intStatus == U_FILE_ACCESS_ERROR) {
981 error(startline, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly.");
982 *status = intStatus;
983 return NULL;
984 }
985 char preBuffer[100], postBuffer[100];
986 escape(parseError.preContext, preBuffer);
987 escape(parseError.postContext, postBuffer);
988 warning(line,
989 "%%%%CollationBin could not be constructed from CollationElements\n"
990 " check context, check that the FractionalUCA.txt UCA version "
991 "matches the current UCD version\n"
992 " UErrorCode=%s UParseError={ line=%d offset=%d pre=<> post=<> }",
993 u_errorName(intStatus),
994 parseError.line,
995 parseError.offset,
996 preBuffer,
997 postBuffer);
998 if(isStrict()){
999 *status = intStatus;
1000 return NULL;
1001 }
1002 }
1003 } else {
1004 if(isVerbose()) {
1005 printf("Not building Collation binary\n");
1006 }
1007 }
1008 #endif
1009 /* in order to achieve smaller data files, we can direct genrb */
1010 /* to omit collation rules */
1011 if(gOmitCollationRules) {
1012 bundle_closeString(state->bundle, member);
1013 } else {
1014 table_add(result, member, line, status);
1015 }
1016 }
1017 if (U_FAILURE(*status))
1018 {
1019 res_close(result);
1020 return NULL;
1021 }
1022 }
1023
1024 // Reached the end without a TOK_CLOSE_BRACE. Should be an error.
1025 *status = U_INTERNAL_PROGRAM_ERROR;
1026 return NULL;
1027 }
1028
1029 static struct SResource *
1030 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
1031 {
1032 struct SResource *result = NULL;
1033 struct SResource *member = NULL;
1034 struct SResource *collationRes = NULL;
1035 struct UString *tokenValue;
1036 struct UString comment;
1037 enum ETokenType token;
1038 char subtag[1024], typeKeyword[1024];
1039 uint32_t line;
1040
1041 result = table_open(state->bundle, tag, NULL, status);
1042
1043 if (result == NULL || U_FAILURE(*status))
1044 {
1045 return NULL;
1046 }
1047 if(isVerbose()){
1048 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1049 }
1050 if(!newCollation) {
1051 return addCollation(state, result, startline, status);
1052 }
1053 else {
1054 for(;;) {
1055 ustr_init(&comment);
1056 token = getToken(state, &tokenValue, &comment, &line, status);
1057
1058 if (token == TOK_CLOSE_BRACE)
1059 {
1060 return result;
1061 }
1062
1063 if (token != TOK_STRING)
1064 {
1065 res_close(result);
1066 *status = U_INVALID_FORMAT_ERROR;
1067
1068 if (token == TOK_EOF)
1069 {
1070 error(startline, "unterminated table");
1071 }
1072 else
1073 {
1074 error(line, "Unexpected token %s", tokenNames[token]);
1075 }
1076
1077 return NULL;
1078 }
1079
1080 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1081
1082 if (U_FAILURE(*status))
1083 {
1084 res_close(result);
1085 return NULL;
1086 }
1087
1088 if (uprv_strcmp(subtag, "default") == 0)
1089 {
1090 member = parseResource(state, subtag, NULL, status);
1091
1092 if (U_FAILURE(*status))
1093 {
1094 res_close(result);
1095 return NULL;
1096 }
1097
1098 table_add(result, member, line, status);
1099 }
1100 else
1101 {
1102 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
1103 /* this probably needs to be refactored or recursively use the parser */
1104 /* first we assume that our collation table won't have the explicit type */
1105 /* then, we cannot handle aliases */
1106 if(token == TOK_OPEN_BRACE) {
1107 token = getToken(state, &tokenValue, &comment, &line, status);
1108 collationRes = table_open(state->bundle, subtag, NULL, status);
1109 collationRes = addCollation(state, collationRes, startline, status); /* need to parse the collation data regardless */
1110 if (gIncludeUnihanColl || uprv_strcmp(subtag, "unihan") != 0) {
1111 table_add(result, collationRes, startline, status);
1112 }
1113 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
1114 /* we could have a table too */
1115 token = peekToken(state, 1, &tokenValue, &line, &comment, status);
1116 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
1117 if(uprv_strcmp(typeKeyword, "alias") == 0) {
1118 member = parseResource(state, subtag, NULL, status);
1119 if (U_FAILURE(*status))
1120 {
1121 res_close(result);
1122 return NULL;
1123 }
1124
1125 table_add(result, member, line, status);
1126 } else {
1127 res_close(result);
1128 *status = U_INVALID_FORMAT_ERROR;
1129 return NULL;
1130 }
1131 } else {
1132 res_close(result);
1133 *status = U_INVALID_FORMAT_ERROR;
1134 return NULL;
1135 }
1136 }
1137
1138 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1139
1140 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1141
1142 if (U_FAILURE(*status))
1143 {
1144 res_close(result);
1145 return NULL;
1146 }
1147 }
1148 }
1149 }
1150
1151 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1152 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1153 static struct SResource *
1154 realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
1155 {
1156 struct SResource *member = NULL;
1157 struct UString *tokenValue=NULL;
1158 struct UString comment;
1159 enum ETokenType token;
1160 char subtag[1024];
1161 uint32_t line;
1162 UBool readToken = FALSE;
1163
1164 /* '{' . (name resource)* '}' */
1165
1166 if(isVerbose()){
1167 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1168 }
1169 for (;;)
1170 {
1171 ustr_init(&comment);
1172 token = getToken(state, &tokenValue, &comment, &line, status);
1173
1174 if (token == TOK_CLOSE_BRACE)
1175 {
1176 if (!readToken) {
1177 warning(startline, "Encountered empty table");
1178 }
1179 return table;
1180 }
1181
1182 if (token != TOK_STRING)
1183 {
1184 *status = U_INVALID_FORMAT_ERROR;
1185
1186 if (token == TOK_EOF)
1187 {
1188 error(startline, "unterminated table");
1189 }
1190 else
1191 {
1192 error(line, "unexpected token %s", tokenNames[token]);
1193 }
1194
1195 return NULL;
1196 }
1197
1198 if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
1199 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1200 } else {
1201 *status = U_INVALID_FORMAT_ERROR;
1202 error(line, "invariant characters required for table keys");
1203 return NULL;
1204 }
1205
1206 if (U_FAILURE(*status))
1207 {
1208 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
1209 return NULL;
1210 }
1211
1212 member = parseResource(state, subtag, &comment, status);
1213
1214 if (member == NULL || U_FAILURE(*status))
1215 {
1216 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
1217 return NULL;
1218 }
1219
1220 table_add(table, member, line, status);
1221
1222 if (U_FAILURE(*status))
1223 {
1224 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
1225 return NULL;
1226 }
1227 readToken = TRUE;
1228 ustr_deinit(&comment);
1229 }
1230
1231 /* not reached */
1232 /* A compiler warning will appear if all paths don't contain a return statement. */
1233 /* *status = U_INTERNAL_PROGRAM_ERROR;
1234 return NULL;*/
1235 }
1236
1237 static struct SResource *
1238 parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1239 {
1240 struct SResource *result;
1241
1242 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
1243 {
1244 return parseCollationElements(state, tag, startline, FALSE, status);
1245 }
1246 if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1247 {
1248 return parseCollationElements(state, tag, startline, TRUE, status);
1249 }
1250 if(isVerbose()){
1251 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1252 }
1253
1254 result = table_open(state->bundle, tag, comment, status);
1255
1256 if (result == NULL || U_FAILURE(*status))
1257 {
1258 return NULL;
1259 }
1260 return realParseTable(state, result, tag, startline, status);
1261 }
1262
1263 static struct SResource *
1264 parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1265 {
1266 struct SResource *result = NULL;
1267 struct SResource *member = NULL;
1268 struct UString *tokenValue;
1269 struct UString memberComments;
1270 enum ETokenType token;
1271 UBool readToken = FALSE;
1272
1273 result = array_open(state->bundle, tag, comment, status);
1274
1275 if (result == NULL || U_FAILURE(*status))
1276 {
1277 return NULL;
1278 }
1279 if(isVerbose()){
1280 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1281 }
1282
1283 ustr_init(&memberComments);
1284
1285 /* '{' . resource [','] '}' */
1286 for (;;)
1287 {
1288 /* reset length */
1289 ustr_setlen(&memberComments, 0, status);
1290
1291 /* check for end of array, but don't consume next token unless it really is the end */
1292 token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
1293
1294
1295 if (token == TOK_CLOSE_BRACE)
1296 {
1297 getToken(state, NULL, NULL, NULL, status);
1298 if (!readToken) {
1299 warning(startline, "Encountered empty array");
1300 }
1301 break;
1302 }
1303
1304 if (token == TOK_EOF)
1305 {
1306 res_close(result);
1307 *status = U_INVALID_FORMAT_ERROR;
1308 error(startline, "unterminated array");
1309 return NULL;
1310 }
1311
1312 /* string arrays are a special case */
1313 if (token == TOK_STRING)
1314 {
1315 getToken(state, &tokenValue, &memberComments, NULL, status);
1316 member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
1317 }
1318 else
1319 {
1320 member = parseResource(state, NULL, &memberComments, status);
1321 }
1322
1323 if (member == NULL || U_FAILURE(*status))
1324 {
1325 res_close(result);
1326 return NULL;
1327 }
1328
1329 array_add(result, member, status);
1330
1331 if (U_FAILURE(*status))
1332 {
1333 res_close(result);
1334 return NULL;
1335 }
1336
1337 /* eat optional comma if present */
1338 token = peekToken(state, 0, NULL, NULL, NULL, status);
1339
1340 if (token == TOK_COMMA)
1341 {
1342 getToken(state, NULL, NULL, NULL, status);
1343 }
1344
1345 if (U_FAILURE(*status))
1346 {
1347 res_close(result);
1348 return NULL;
1349 }
1350 readToken = TRUE;
1351 }
1352
1353 ustr_deinit(&memberComments);
1354 return result;
1355 }
1356
1357 static struct SResource *
1358 parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1359 {
1360 struct SResource *result = NULL;
1361 enum ETokenType token;
1362 char *string;
1363 int32_t value;
1364 UBool readToken = FALSE;
1365 char *stopstring;
1366 uint32_t len;
1367 struct UString memberComments;
1368
1369 result = intvector_open(state->bundle, tag, comment, status);
1370
1371 if (result == NULL || U_FAILURE(*status))
1372 {
1373 return NULL;
1374 }
1375
1376 if(isVerbose()){
1377 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1378 }
1379 ustr_init(&memberComments);
1380 /* '{' . string [','] '}' */
1381 for (;;)
1382 {
1383 ustr_setlen(&memberComments, 0, status);
1384
1385 /* check for end of array, but don't consume next token unless it really is the end */
1386 token = peekToken(state, 0, NULL, NULL,&memberComments, status);
1387
1388 if (token == TOK_CLOSE_BRACE)
1389 {
1390 /* it's the end, consume the close brace */
1391 getToken(state, NULL, NULL, NULL, status);
1392 if (!readToken) {
1393 warning(startline, "Encountered empty int vector");
1394 }
1395 ustr_deinit(&memberComments);
1396 return result;
1397 }
1398
1399 string = getInvariantString(state, NULL, NULL, status);
1400
1401 if (U_FAILURE(*status))
1402 {
1403 res_close(result);
1404 return NULL;
1405 }
1406
1407 /* For handling illegal char in the Intvector */
1408 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1409 len=(uint32_t)(stopstring-string);
1410
1411 if(len==uprv_strlen(string))
1412 {
1413 intvector_add(result, value, status);
1414 uprv_free(string);
1415 token = peekToken(state, 0, NULL, NULL, NULL, status);
1416 }
1417 else
1418 {
1419 uprv_free(string);
1420 *status=U_INVALID_CHAR_FOUND;
1421 }
1422
1423 if (U_FAILURE(*status))
1424 {
1425 res_close(result);
1426 return NULL;
1427 }
1428
1429 /* the comma is optional (even though it is required to prevent the reader from concatenating
1430 consecutive entries) so that a missing comma on the last entry isn't an error */
1431 if (token == TOK_COMMA)
1432 {
1433 getToken(state, NULL, NULL, NULL, status);
1434 }
1435 readToken = TRUE;
1436 }
1437
1438 /* not reached */
1439 /* A compiler warning will appear if all paths don't contain a return statement. */
1440 /* intvector_close(result, status);
1441 *status = U_INTERNAL_PROGRAM_ERROR;
1442 return NULL;*/
1443 }
1444
1445 static struct SResource *
1446 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1447 {
1448 struct SResource *result = NULL;
1449 uint8_t *value;
1450 char *string;
1451 char toConv[3] = {'\0', '\0', '\0'};
1452 uint32_t count;
1453 uint32_t i;
1454 uint32_t line;
1455 char *stopstring;
1456 uint32_t len;
1457
1458 string = getInvariantString(state, &line, NULL, status);
1459
1460 if (string == NULL || U_FAILURE(*status))
1461 {
1462 return NULL;
1463 }
1464
1465 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1466
1467 if (U_FAILURE(*status))
1468 {
1469 uprv_free(string);
1470 return NULL;
1471 }
1472
1473 if(isVerbose()){
1474 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1475 }
1476
1477 count = (uint32_t)uprv_strlen(string);
1478 if (count > 0){
1479 if((count % 2)==0){
1480 value = static_cast<uint8_t *>(uprv_malloc(sizeof(uint8_t) * count));
1481
1482 if (value == NULL)
1483 {
1484 uprv_free(string);
1485 *status = U_MEMORY_ALLOCATION_ERROR;
1486 return NULL;
1487 }
1488
1489 for (i = 0; i < count; i += 2)
1490 {
1491 toConv[0] = string[i];
1492 toConv[1] = string[i + 1];
1493
1494 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
1495 len=(uint32_t)(stopstring-toConv);
1496
1497 if(len!=uprv_strlen(toConv))
1498 {
1499 uprv_free(string);
1500 *status=U_INVALID_CHAR_FOUND;
1501 return NULL;
1502 }
1503 }
1504
1505 result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status);
1506
1507 uprv_free(value);
1508 }
1509 else
1510 {
1511 *status = U_INVALID_CHAR_FOUND;
1512 uprv_free(string);
1513 error(line, "Encountered invalid binary string");
1514 return NULL;
1515 }
1516 }
1517 else
1518 {
1519 result = bin_open(state->bundle, tag, 0, NULL, "",comment,status);
1520 warning(startline, "Encountered empty binary tag");
1521 }
1522 uprv_free(string);
1523
1524 return result;
1525 }
1526
1527 static struct SResource *
1528 parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1529 {
1530 struct SResource *result = NULL;
1531 int32_t value;
1532 char *string;
1533 char *stopstring;
1534 uint32_t len;
1535
1536 string = getInvariantString(state, NULL, NULL, status);
1537
1538 if (string == NULL || U_FAILURE(*status))
1539 {
1540 return NULL;
1541 }
1542
1543 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1544
1545 if (U_FAILURE(*status))
1546 {
1547 uprv_free(string);
1548 return NULL;
1549 }
1550
1551 if(isVerbose()){
1552 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1553 }
1554
1555 if (uprv_strlen(string) <= 0)
1556 {
1557 warning(startline, "Encountered empty integer. Default value is 0.");
1558 }
1559
1560 /* Allow integer support for hexdecimal, octal digit and decimal*/
1561 /* and handle illegal char in the integer*/
1562 value = uprv_strtoul(string, &stopstring, 0);
1563 len=(uint32_t)(stopstring-string);
1564 if(len==uprv_strlen(string))
1565 {
1566 result = int_open(state->bundle, tag, value, comment, status);
1567 }
1568 else
1569 {
1570 *status=U_INVALID_CHAR_FOUND;
1571 }
1572 uprv_free(string);
1573
1574 return result;
1575 }
1576
1577 static struct SResource *
1578 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1579 {
1580 struct SResource *result;
1581 FileStream *file;
1582 int32_t len;
1583 uint8_t *data;
1584 char *filename;
1585 uint32_t line;
1586 char *fullname = NULL;
1587 filename = getInvariantString(state, &line, NULL, status);
1588
1589 if (U_FAILURE(*status))
1590 {
1591 return NULL;
1592 }
1593
1594 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1595
1596 if (U_FAILURE(*status))
1597 {
1598 uprv_free(filename);
1599 return NULL;
1600 }
1601
1602 if(isVerbose()){
1603 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1604 }
1605
1606 /* Open the input file for reading */
1607 if (state->inputdir == NULL)
1608 {
1609 #if 1
1610 /*
1611 * Always save file file name, even if there's
1612 * no input directory specified. MIGHT BREAK SOMETHING
1613 */
1614 int32_t filenameLength = uprv_strlen(filename);
1615
1616 fullname = (char *) uprv_malloc(filenameLength + 1);
1617 uprv_strcpy(fullname, filename);
1618 #endif
1619
1620 file = T_FileStream_open(filename, "rb");
1621 }
1622 else
1623 {
1624
1625 int32_t count = (int32_t)uprv_strlen(filename);
1626
1627 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1628 {
1629 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1630
1631 /* test for NULL */
1632 if(fullname == NULL)
1633 {
1634 *status = U_MEMORY_ALLOCATION_ERROR;
1635 return NULL;
1636 }
1637
1638 uprv_strcpy(fullname, state->inputdir);
1639
1640 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1641 fullname[state->inputdirLength + 1] = '\0';
1642
1643 uprv_strcat(fullname, filename);
1644 }
1645 else
1646 {
1647 fullname = (char *) uprv_malloc(state->inputdirLength + count + 1);
1648
1649 /* test for NULL */
1650 if(fullname == NULL)
1651 {
1652 *status = U_MEMORY_ALLOCATION_ERROR;
1653 return NULL;
1654 }
1655
1656 uprv_strcpy(fullname, state->inputdir);
1657 uprv_strcat(fullname, filename);
1658 }
1659
1660 file = T_FileStream_open(fullname, "rb");
1661
1662 }
1663
1664 if (file == NULL)
1665 {
1666 error(line, "couldn't open input file %s", filename);
1667 *status = U_FILE_ACCESS_ERROR;
1668 return NULL;
1669 }
1670
1671 len = T_FileStream_size(file);
1672 data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
1673 /* test for NULL */
1674 if(data == NULL)
1675 {
1676 *status = U_MEMORY_ALLOCATION_ERROR;
1677 T_FileStream_close (file);
1678 return NULL;
1679 }
1680
1681 /* int32_t numRead = */ T_FileStream_read (file, data, len);
1682 T_FileStream_close (file);
1683
1684 result = bin_open(state->bundle, tag, len, data, fullname, comment, status);
1685
1686 uprv_free(data);
1687 uprv_free(filename);
1688 uprv_free(fullname);
1689
1690 return result;
1691 }
1692
1693 static struct SResource *
1694 parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1695 {
1696 struct SResource *result;
1697 int32_t len=0;
1698 char *filename;
1699 uint32_t line;
1700 UChar *pTarget = NULL;
1701
1702 UCHARBUF *ucbuf;
1703 char *fullname = NULL;
1704 int32_t count = 0;
1705 const char* cp = NULL;
1706 const UChar* uBuffer = NULL;
1707
1708 filename = getInvariantString(state, &line, NULL, status);
1709 count = (int32_t)uprv_strlen(filename);
1710
1711 if (U_FAILURE(*status))
1712 {
1713 return NULL;
1714 }
1715
1716 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1717
1718 if (U_FAILURE(*status))
1719 {
1720 uprv_free(filename);
1721 return NULL;
1722 }
1723
1724 if(isVerbose()){
1725 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1726 }
1727
1728 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1729 /* test for NULL */
1730 if(fullname == NULL)
1731 {
1732 *status = U_MEMORY_ALLOCATION_ERROR;
1733 uprv_free(filename);
1734 return NULL;
1735 }
1736
1737 if(state->inputdir!=NULL){
1738 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1739 {
1740
1741 uprv_strcpy(fullname, state->inputdir);
1742
1743 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1744 fullname[state->inputdirLength + 1] = '\0';
1745
1746 uprv_strcat(fullname, filename);
1747 }
1748 else
1749 {
1750 uprv_strcpy(fullname, state->inputdir);
1751 uprv_strcat(fullname, filename);
1752 }
1753 }else{
1754 uprv_strcpy(fullname,filename);
1755 }
1756
1757 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1758
1759 if (U_FAILURE(*status)) {
1760 error(line, "couldn't open input file %s\n", filename);
1761 return NULL;
1762 }
1763
1764 uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
1765 result = string_open(state->bundle, tag, uBuffer, len, comment, status);
1766
1767 ucbuf_close(ucbuf);
1768
1769 uprv_free(pTarget);
1770
1771 uprv_free(filename);
1772 uprv_free(fullname);
1773
1774 return result;
1775 }
1776
1777
1778
1779
1780
1781 U_STRING_DECL(k_type_string, "string", 6);
1782 U_STRING_DECL(k_type_binary, "binary", 6);
1783 U_STRING_DECL(k_type_bin, "bin", 3);
1784 U_STRING_DECL(k_type_table, "table", 5);
1785 U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17);
1786 U_STRING_DECL(k_type_int, "int", 3);
1787 U_STRING_DECL(k_type_integer, "integer", 7);
1788 U_STRING_DECL(k_type_array, "array", 5);
1789 U_STRING_DECL(k_type_alias, "alias", 5);
1790 U_STRING_DECL(k_type_intvector, "intvector", 9);
1791 U_STRING_DECL(k_type_import, "import", 6);
1792 U_STRING_DECL(k_type_include, "include", 7);
1793
1794 /* Various non-standard processing plugins that create one or more special resources. */
1795 U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1796 U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18);
1797 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23);
1798 U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19);
1799
1800 typedef enum EResourceType
1801 {
1802 RT_UNKNOWN,
1803 RT_STRING,
1804 RT_BINARY,
1805 RT_TABLE,
1806 RT_TABLE_NO_FALLBACK,
1807 RT_INTEGER,
1808 RT_ARRAY,
1809 RT_ALIAS,
1810 RT_INTVECTOR,
1811 RT_IMPORT,
1812 RT_INCLUDE,
1813 RT_PROCESS_UCA_RULES,
1814 RT_PROCESS_COLLATION,
1815 RT_PROCESS_TRANSLITERATOR,
1816 RT_PROCESS_DEPENDENCY,
1817 RT_RESERVED
1818 } EResourceType;
1819
1820 static struct {
1821 const char *nameChars; /* only used for debugging */
1822 const UChar *nameUChars;
1823 ParseResourceFunction *parseFunction;
1824 } gResourceTypes[] = {
1825 {"Unknown", NULL, NULL},
1826 {"string", k_type_string, parseString},
1827 {"binary", k_type_binary, parseBinary},
1828 {"table", k_type_table, parseTable},
1829 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1830 {"integer", k_type_integer, parseInteger},
1831 {"array", k_type_array, parseArray},
1832 {"alias", k_type_alias, parseAlias},
1833 {"intvector", k_type_intvector, parseIntVector},
1834 {"import", k_type_import, parseImport},
1835 {"include", k_type_include, parseInclude},
1836 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1837 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1838 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1839 {"process(dependency)", k_type_plugin_dependency, parseDependency},
1840 {"reserved", NULL, NULL}
1841 };
1842
1843 void initParser(UBool omitCollationRules)
1844 {
1845 U_STRING_INIT(k_type_string, "string", 6);
1846 U_STRING_INIT(k_type_binary, "binary", 6);
1847 U_STRING_INIT(k_type_bin, "bin", 3);
1848 U_STRING_INIT(k_type_table, "table", 5);
1849 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17);
1850 U_STRING_INIT(k_type_int, "int", 3);
1851 U_STRING_INIT(k_type_integer, "integer", 7);
1852 U_STRING_INIT(k_type_array, "array", 5);
1853 U_STRING_INIT(k_type_alias, "alias", 5);
1854 U_STRING_INIT(k_type_intvector, "intvector", 9);
1855 U_STRING_INIT(k_type_import, "import", 6);
1856 U_STRING_INIT(k_type_include, "include", 7);
1857
1858 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1859 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18);
1860 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23);
1861 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19);
1862
1863 gOmitCollationRules = omitCollationRules;
1864 }
1865
1866 static inline UBool isTable(enum EResourceType type) {
1867 return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK);
1868 }
1869
1870 static enum EResourceType
1871 parseResourceType(ParseState* state, UErrorCode *status)
1872 {
1873 struct UString *tokenValue;
1874 struct UString comment;
1875 enum EResourceType result = RT_UNKNOWN;
1876 uint32_t line=0;
1877 ustr_init(&comment);
1878 expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
1879
1880 if (U_FAILURE(*status))
1881 {
1882 return RT_UNKNOWN;
1883 }
1884
1885 *status = U_ZERO_ERROR;
1886
1887 /* Search for normal types */
1888 result=RT_UNKNOWN;
1889 while ((result=(EResourceType)(result+1)) < RT_RESERVED) {
1890 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1891 break;
1892 }
1893 }
1894 /* Now search for the aliases */
1895 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
1896 result = RT_INTEGER;
1897 }
1898 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
1899 result = RT_BINARY;
1900 }
1901 else if (result == RT_RESERVED) {
1902 char tokenBuffer[1024];
1903 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1904 tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1905 *status = U_INVALID_FORMAT_ERROR;
1906 error(line, "unknown resource type '%s'", tokenBuffer);
1907 }
1908
1909 return result;
1910 }
1911
1912 /* parse a non-top-level resource */
1913 static struct SResource *
1914 parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
1915 {
1916 enum ETokenType token;
1917 enum EResourceType resType = RT_UNKNOWN;
1918 ParseResourceFunction *parseFunction = NULL;
1919 struct UString *tokenValue;
1920 uint32_t startline;
1921 uint32_t line;
1922
1923
1924 token = getToken(state, &tokenValue, NULL, &startline, status);
1925
1926 if(isVerbose()){
1927 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1928 }
1929
1930 /* name . [ ':' type ] '{' resource '}' */
1931 /* This function parses from the colon onwards. If the colon is present, parse the
1932 type then try to parse a resource of that type. If there is no explicit type,
1933 work it out using the lookahead tokens. */
1934 switch (token)
1935 {
1936 case TOK_EOF:
1937 *status = U_INVALID_FORMAT_ERROR;
1938 error(startline, "Unexpected EOF encountered");
1939 return NULL;
1940
1941 case TOK_ERROR:
1942 *status = U_INVALID_FORMAT_ERROR;
1943 return NULL;
1944
1945 case TOK_COLON:
1946 resType = parseResourceType(state, status);
1947 expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
1948
1949 if (U_FAILURE(*status))
1950 {
1951 return NULL;
1952 }
1953
1954 break;
1955
1956 case TOK_OPEN_BRACE:
1957 break;
1958
1959 default:
1960 *status = U_INVALID_FORMAT_ERROR;
1961 error(startline, "syntax error while reading a resource, expected '{' or ':'");
1962 return NULL;
1963 }
1964
1965
1966 if (resType == RT_UNKNOWN)
1967 {
1968 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
1969 We could have any of the following:
1970 { { => array (nested)
1971 { :/} => array
1972 { string , => string array
1973
1974 { string { => table
1975
1976 { string :/{ => table
1977 { string } => string
1978 */
1979
1980 token = peekToken(state, 0, NULL, &line, NULL,status);
1981
1982 if (U_FAILURE(*status))
1983 {
1984 return NULL;
1985 }
1986
1987 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
1988 {
1989 resType = RT_ARRAY;
1990 }
1991 else if (token == TOK_STRING)
1992 {
1993 token = peekToken(state, 1, NULL, &line, NULL, status);
1994
1995 if (U_FAILURE(*status))
1996 {
1997 return NULL;
1998 }
1999
2000 switch (token)
2001 {
2002 case TOK_COMMA: resType = RT_ARRAY; break;
2003 case TOK_OPEN_BRACE: resType = RT_TABLE; break;
2004 case TOK_CLOSE_BRACE: resType = RT_STRING; break;
2005 case TOK_COLON: resType = RT_TABLE; break;
2006 default:
2007 *status = U_INVALID_FORMAT_ERROR;
2008 error(line, "Unexpected token after string, expected ',', '{' or '}'");
2009 return NULL;
2010 }
2011 }
2012 else
2013 {
2014 *status = U_INVALID_FORMAT_ERROR;
2015 error(line, "Unexpected token after '{'");
2016 return NULL;
2017 }
2018
2019 /* printf("Type guessed as %s\n", resourceNames[resType]); */
2020 } else if(resType == RT_TABLE_NO_FALLBACK) {
2021 *status = U_INVALID_FORMAT_ERROR;
2022 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
2023 return NULL;
2024 }
2025
2026
2027 /* We should now know what we need to parse next, so call the appropriate parser
2028 function and return. */
2029 parseFunction = gResourceTypes[resType].parseFunction;
2030 if (parseFunction != NULL) {
2031 return parseFunction(state, tag, startline, comment, status);
2032 }
2033 else {
2034 *status = U_INTERNAL_PROGRAM_ERROR;
2035 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
2036 }
2037
2038 return NULL;
2039 }
2040
2041 /* parse the top-level resource */
2042 struct SRBRoot *
2043 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UBool makeBinaryCollation,
2044 UErrorCode *status)
2045 {
2046 struct UString *tokenValue;
2047 struct UString comment;
2048 uint32_t line;
2049 enum EResourceType bundleType;
2050 enum ETokenType token;
2051 ParseState state;
2052 uint32_t i;
2053
2054
2055 for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
2056 {
2057 ustr_init(&state.lookahead[i].value);
2058 ustr_init(&state.lookahead[i].comment);
2059 }
2060
2061 initLookahead(&state, buf, status);
2062
2063 state.inputdir = inputDir;
2064 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
2065 state.outputdir = outputDir;
2066 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
2067 state.makeBinaryCollation = makeBinaryCollation;
2068
2069 ustr_init(&comment);
2070 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
2071
2072 state.bundle = bundle_open(&comment, FALSE, status);
2073
2074 if (state.bundle == NULL || U_FAILURE(*status))
2075 {
2076 return NULL;
2077 }
2078
2079
2080 bundle_setlocale(state.bundle, tokenValue->fChars, status);
2081
2082 /* The following code is to make Empty bundle work no matter with :table specifer or not */
2083 token = getToken(&state, NULL, NULL, &line, status);
2084 if(token==TOK_COLON) {
2085 *status=U_ZERO_ERROR;
2086 bundleType=parseResourceType(&state, status);
2087
2088 if(isTable(bundleType))
2089 {
2090 expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
2091 }
2092 else
2093 {
2094 *status=U_PARSE_ERROR;
2095 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
2096 }
2097 }
2098 else
2099 {
2100 /* not a colon */
2101 if(token==TOK_OPEN_BRACE)
2102 {
2103 *status=U_ZERO_ERROR;
2104 bundleType=RT_TABLE;
2105 }
2106 else
2107 {
2108 /* neither colon nor open brace */
2109 *status=U_PARSE_ERROR;
2110 bundleType=RT_UNKNOWN;
2111 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
2112 }
2113 }
2114
2115 if (U_FAILURE(*status))
2116 {
2117 bundle_close(state.bundle, status);
2118 return NULL;
2119 }
2120
2121 if(bundleType==RT_TABLE_NO_FALLBACK) {
2122 /*
2123 * Parse a top-level table with the table(nofallback) declaration.
2124 * This is the same as a regular table, but also sets the
2125 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2126 */
2127 state.bundle->noFallback=TRUE;
2128 }
2129 /* top-level tables need not handle special table names like "collations" */
2130 realParseTable(&state, state.bundle->fRoot, NULL, line, status);
2131 if(dependencyArray!=NULL){
2132 table_add(state.bundle->fRoot, dependencyArray, 0, status);
2133 dependencyArray = NULL;
2134 }
2135 if (U_FAILURE(*status))
2136 {
2137 bundle_close(state.bundle, status);
2138 res_close(dependencyArray);
2139 return NULL;
2140 }
2141
2142 if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
2143 {
2144 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
2145 if(isStrict()){
2146 *status = U_INVALID_FORMAT_ERROR;
2147 return NULL;
2148 }
2149 }
2150
2151 cleanupLookahead(&state);
2152 ustr_deinit(&comment);
2153 return state.bundle;
2154 }