]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/genrb/parse.cpp
ICU-57166.0.1.tar.gz
[apple/icu.git] / icuSources / tools / genrb / parse.cpp
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1998-2015, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File parse.cpp
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 05/26/99 stephen Creation.
15 * 02/25/00 weiv Overhaul to write udata
16 * 5/10/01 Ram removed ustdio dependency
17 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
18 *******************************************************************************
19 */
20
21 // Safer use of UnicodeString.
22 #ifndef UNISTR_FROM_CHAR_EXPLICIT
23 # define UNISTR_FROM_CHAR_EXPLICIT explicit
24 #endif
25
26 // Less important, but still a good idea.
27 #ifndef UNISTR_FROM_STRING_EXPLICIT
28 # define UNISTR_FROM_STRING_EXPLICIT explicit
29 #endif
30
31 #include <assert.h>
32 #include "parse.h"
33 #include "errmsg.h"
34 #include "uhash.h"
35 #include "cmemory.h"
36 #include "cstring.h"
37 #include "uinvchar.h"
38 #include "read.h"
39 #include "ustr.h"
40 #include "reslist.h"
41 #include "rbt_pars.h"
42 #include "genrb.h"
43 #include "unicode/stringpiece.h"
44 #include "unicode/unistr.h"
45 #include "unicode/ustring.h"
46 #include "unicode/uscript.h"
47 #include "unicode/utf16.h"
48 #include "unicode/putil.h"
49 #include "charstr.h"
50 #include "collationbuilder.h"
51 #include "collationdata.h"
52 #include "collationdatareader.h"
53 #include "collationdatawriter.h"
54 #include "collationfastlatinbuilder.h"
55 #include "collationinfo.h"
56 #include "collationroot.h"
57 #include "collationruleparser.h"
58 #include "collationtailoring.h"
59 #include <stdio.h>
60
61 /* Number of tokens to read ahead of the current stream position */
62 #define MAX_LOOKAHEAD 3
63
64 #define CR 0x000D
65 #define LF 0x000A
66 #define SPACE 0x0020
67 #define TAB 0x0009
68 #define ESCAPE 0x005C
69 #define HASH 0x0023
70 #define QUOTE 0x0027
71 #define ZERO 0x0030
72 #define STARTCOMMAND 0x005B
73 #define ENDCOMMAND 0x005D
74 #define OPENSQBRACKET 0x005B
75 #define CLOSESQBRACKET 0x005D
76
77 using icu::CharString;
78 using icu::LocalMemory;
79 using icu::LocalPointer;
80 using icu::LocalUCHARBUFPointer;
81 using icu::StringPiece;
82 using icu::UnicodeString;
83
84 struct Lookahead
85 {
86 enum ETokenType type;
87 struct UString value;
88 struct UString comment;
89 uint32_t line;
90 };
91
92 /* keep in sync with token defines in read.h */
93 const char *tokenNames[TOK_TOKEN_COUNT] =
94 {
95 "string", /* A string token, such as "MonthNames" */
96 "'{'", /* An opening brace character */
97 "'}'", /* A closing brace character */
98 "','", /* A comma */
99 "':'", /* A colon */
100
101 "<end of file>", /* End of the file has been reached successfully */
102 "<end of line>"
103 };
104
105 /* Just to store "TRUE" */
106 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
107
108 typedef struct {
109 struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
110 uint32_t lookaheadPosition;
111 UCHARBUF *buffer;
112 struct SRBRoot *bundle;
113 const char *inputdir;
114 uint32_t inputdirLength;
115 const char *outputdir;
116 uint32_t outputdirLength;
117 const char *filename;
118 UBool makeBinaryCollation;
119 UBool omitCollationRules;
120 } ParseState;
121
122 typedef struct SResource *
123 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
124
125 static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
126
127 /* The nature of the lookahead buffer:
128 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
129 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
130 When getToken is called, the current pointer is moved to the next slot and the
131 old slot is filled with the next token from the reader by calling getNextToken.
132 The token values are stored in the slot, which means that token values don't
133 survive a call to getToken, ie.
134
135 UString *value;
136
137 getToken(&value, NULL, status);
138 getToken(NULL, NULL, status); bad - value is now a different string
139 */
140 static void
141 initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
142 {
143 static uint32_t initTypeStrings = 0;
144 uint32_t i;
145
146 if (!initTypeStrings)
147 {
148 initTypeStrings = 1;
149 }
150
151 state->lookaheadPosition = 0;
152 state->buffer = buf;
153
154 resetLineNumber();
155
156 for (i = 0; i < MAX_LOOKAHEAD; i++)
157 {
158 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
159 if (U_FAILURE(*status))
160 {
161 return;
162 }
163 }
164
165 *status = U_ZERO_ERROR;
166 }
167
168 static void
169 cleanupLookahead(ParseState* state)
170 {
171 uint32_t i;
172 for (i = 0; i <= MAX_LOOKAHEAD; i++)
173 {
174 ustr_deinit(&state->lookahead[i].value);
175 ustr_deinit(&state->lookahead[i].comment);
176 }
177
178 }
179
180 static enum ETokenType
181 getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
182 {
183 enum ETokenType result;
184 uint32_t i;
185
186 result = state->lookahead[state->lookaheadPosition].type;
187
188 if (tokenValue != NULL)
189 {
190 *tokenValue = &state->lookahead[state->lookaheadPosition].value;
191 }
192
193 if (linenumber != NULL)
194 {
195 *linenumber = state->lookahead[state->lookaheadPosition].line;
196 }
197
198 if (comment != NULL)
199 {
200 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
201 }
202
203 i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
204 state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
205 ustr_setlen(&state->lookahead[i].comment, 0, status);
206 ustr_setlen(&state->lookahead[i].value, 0, status);
207 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
208
209 /* printf("getToken, returning %s\n", tokenNames[result]); */
210
211 return result;
212 }
213
214 static enum ETokenType
215 peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
216 {
217 uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
218
219 if (U_FAILURE(*status))
220 {
221 return TOK_ERROR;
222 }
223
224 if (lookaheadCount >= MAX_LOOKAHEAD)
225 {
226 *status = U_INTERNAL_PROGRAM_ERROR;
227 return TOK_ERROR;
228 }
229
230 if (tokenValue != NULL)
231 {
232 *tokenValue = &state->lookahead[i].value;
233 }
234
235 if (linenumber != NULL)
236 {
237 *linenumber = state->lookahead[i].line;
238 }
239
240 if(comment != NULL){
241 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
242 }
243
244 return state->lookahead[i].type;
245 }
246
247 static void
248 expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
249 {
250 uint32_t line;
251
252 enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
253
254 if (linenumber != NULL)
255 {
256 *linenumber = line;
257 }
258
259 if (U_FAILURE(*status))
260 {
261 return;
262 }
263
264 if (token != expectedToken)
265 {
266 *status = U_INVALID_FORMAT_ERROR;
267 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
268 }
269 else
270 {
271 *status = U_ZERO_ERROR;
272 }
273 }
274
275 static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
276 {
277 struct UString *tokenValue;
278 char *result;
279 uint32_t count;
280
281 expect(state, TOK_STRING, &tokenValue, comment, line, status);
282
283 if (U_FAILURE(*status))
284 {
285 return NULL;
286 }
287
288 count = u_strlen(tokenValue->fChars);
289 if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
290 *status = U_INVALID_FORMAT_ERROR;
291 error(*line, "invariant characters required for table keys, binary data, etc.");
292 return NULL;
293 }
294
295 result = static_cast<char *>(uprv_malloc(count+1));
296
297 if (result == NULL)
298 {
299 *status = U_MEMORY_ALLOCATION_ERROR;
300 return NULL;
301 }
302
303 u_UCharsToChars(tokenValue->fChars, result, count+1);
304 return result;
305 }
306
307 static struct SResource *
308 parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
309 {
310 struct SResource *result = NULL;
311 struct UString *tokenValue;
312 FileStream *file = NULL;
313 char filename[256] = { '\0' };
314 char cs[128] = { '\0' };
315 uint32_t line;
316 UBool quoted = FALSE;
317 UCHARBUF *ucbuf=NULL;
318 UChar32 c = 0;
319 const char* cp = NULL;
320 UChar *pTarget = NULL;
321 UChar *target = NULL;
322 UChar *targetLimit = NULL;
323 int32_t size = 0;
324
325 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
326
327 if(isVerbose()){
328 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
329 }
330
331 if (U_FAILURE(*status))
332 {
333 return NULL;
334 }
335 /* make the filename including the directory */
336 if (state->inputdir != NULL)
337 {
338 uprv_strcat(filename, state->inputdir);
339
340 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
341 {
342 uprv_strcat(filename, U_FILE_SEP_STRING);
343 }
344 }
345
346 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
347
348 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
349
350 if (U_FAILURE(*status))
351 {
352 return NULL;
353 }
354 uprv_strcat(filename, cs);
355
356 if(state->omitCollationRules) {
357 return res_none();
358 }
359
360 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
361
362 if (U_FAILURE(*status)) {
363 error(line, "An error occured while opening the input file %s\n", filename);
364 return NULL;
365 }
366
367 /* We allocate more space than actually required
368 * since the actual size needed for storing UChars
369 * is not known in UTF-8 byte stream
370 */
371 size = ucbuf_size(ucbuf) + 1;
372 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
373 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
374 target = pTarget;
375 targetLimit = pTarget+size;
376
377 /* read the rules into the buffer */
378 while (target < targetLimit)
379 {
380 c = ucbuf_getc(ucbuf, status);
381 if(c == QUOTE) {
382 quoted = (UBool)!quoted;
383 }
384 /* weiv (06/26/2002): adding the following:
385 * - preserving spaces in commands [...]
386 * - # comments until the end of line
387 */
388 if (c == STARTCOMMAND && !quoted)
389 {
390 /* preserve commands
391 * closing bracket will be handled by the
392 * append at the end of the loop
393 */
394 while(c != ENDCOMMAND) {
395 U_APPEND_CHAR32_ONLY(c, target);
396 c = ucbuf_getc(ucbuf, status);
397 }
398 }
399 else if (c == HASH && !quoted) {
400 /* skip comments */
401 while(c != CR && c != LF) {
402 c = ucbuf_getc(ucbuf, status);
403 }
404 continue;
405 }
406 else if (c == ESCAPE)
407 {
408 c = unescape(ucbuf, status);
409
410 if (c == (UChar32)U_ERR)
411 {
412 uprv_free(pTarget);
413 T_FileStream_close(file);
414 return NULL;
415 }
416 }
417 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
418 {
419 /* ignore spaces carriage returns
420 * and line feed unless in the form \uXXXX
421 */
422 continue;
423 }
424
425 /* Append UChar * after dissembling if c > 0xffff*/
426 if (c != (UChar32)U_EOF)
427 {
428 U_APPEND_CHAR32_ONLY(c, target);
429 }
430 else
431 {
432 break;
433 }
434 }
435
436 /* terminate the string */
437 if(target < targetLimit){
438 *target = 0x0000;
439 }
440
441 result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
442
443
444 ucbuf_close(ucbuf);
445 uprv_free(pTarget);
446 T_FileStream_close(file);
447
448 return result;
449 }
450
451 static struct SResource *
452 parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
453 {
454 struct SResource *result = NULL;
455 struct UString *tokenValue;
456 FileStream *file = NULL;
457 char filename[256] = { '\0' };
458 char cs[128] = { '\0' };
459 uint32_t line;
460 UCHARBUF *ucbuf=NULL;
461 const char* cp = NULL;
462 UChar *pTarget = NULL;
463 const UChar *pSource = NULL;
464 int32_t size = 0;
465
466 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
467
468 if(isVerbose()){
469 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
470 }
471
472 if (U_FAILURE(*status))
473 {
474 return NULL;
475 }
476 /* make the filename including the directory */
477 if (state->inputdir != NULL)
478 {
479 uprv_strcat(filename, state->inputdir);
480
481 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
482 {
483 uprv_strcat(filename, U_FILE_SEP_STRING);
484 }
485 }
486
487 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
488
489 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
490
491 if (U_FAILURE(*status))
492 {
493 return NULL;
494 }
495 uprv_strcat(filename, cs);
496
497
498 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
499
500 if (U_FAILURE(*status)) {
501 error(line, "An error occured while opening the input file %s\n", filename);
502 return NULL;
503 }
504
505 /* We allocate more space than actually required
506 * since the actual size needed for storing UChars
507 * is not known in UTF-8 byte stream
508 */
509 pSource = ucbuf_getBuffer(ucbuf, &size, status);
510 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
511 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
512
513 #if !UCONFIG_NO_TRANSLITERATION
514 size = utrans_stripRules(pSource, size, pTarget, status);
515 #else
516 size = 0;
517 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
518 #endif
519 result = string_open(state->bundle, tag, pTarget, size, NULL, status);
520
521 ucbuf_close(ucbuf);
522 uprv_free(pTarget);
523 T_FileStream_close(file);
524
525 return result;
526 }
527 static ArrayResource* dependencyArray = NULL;
528
529 static struct SResource *
530 parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
531 {
532 struct SResource *result = NULL;
533 struct SResource *elem = NULL;
534 struct UString *tokenValue;
535 uint32_t line;
536 char filename[256] = { '\0' };
537 char cs[128] = { '\0' };
538
539 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
540
541 if(isVerbose()){
542 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
543 }
544
545 if (U_FAILURE(*status))
546 {
547 return NULL;
548 }
549 /* make the filename including the directory */
550 if (state->outputdir != NULL)
551 {
552 uprv_strcat(filename, state->outputdir);
553
554 if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
555 {
556 uprv_strcat(filename, U_FILE_SEP_STRING);
557 }
558 }
559
560 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
561
562 if (U_FAILURE(*status))
563 {
564 return NULL;
565 }
566 uprv_strcat(filename, cs);
567 if(!T_FileStream_file_exists(filename)){
568 if(isStrict()){
569 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
570 }else{
571 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
572 }
573 }
574 if(dependencyArray==NULL){
575 dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
576 }
577 if(tag!=NULL){
578 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
579 }
580 elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
581
582 dependencyArray->add(elem);
583
584 if (U_FAILURE(*status))
585 {
586 return NULL;
587 }
588 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
589 return result;
590 }
591 static struct SResource *
592 parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
593 {
594 struct UString *tokenValue;
595 struct SResource *result = NULL;
596
597 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
598 {
599 return parseUCARules(tag, startline, status);
600 }*/
601 if(isVerbose()){
602 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
603 }
604 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
605
606 if (U_SUCCESS(*status))
607 {
608 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
609 doesn't survive expect either) */
610
611 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
612 if(U_SUCCESS(*status) && result) {
613 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
614
615 if (U_FAILURE(*status))
616 {
617 res_close(result);
618 return NULL;
619 }
620 }
621 }
622
623 return result;
624 }
625
626 static struct SResource *
627 parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
628 {
629 struct UString *tokenValue;
630 struct SResource *result = NULL;
631
632 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
633
634 if(isVerbose()){
635 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
636 }
637
638 if (U_SUCCESS(*status))
639 {
640 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
641 doesn't survive expect either) */
642
643 result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
644
645 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
646
647 if (U_FAILURE(*status))
648 {
649 res_close(result);
650 return NULL;
651 }
652 }
653
654 return result;
655 }
656
657 #if !UCONFIG_NO_COLLATION
658
659 namespace {
660
661 static struct SResource* resLookup(struct SResource* res, const char* key){
662 if (res == res_none() || !res->isTable()) {
663 return NULL;
664 }
665
666 TableResource *list = static_cast<TableResource *>(res);
667 SResource *current = list->fFirst;
668 while (current != NULL) {
669 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
670 return current;
671 }
672 current = current->fNext;
673 }
674 return NULL;
675 }
676
677 class GenrbImporter : public icu::CollationRuleParser::Importer {
678 public:
679 GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out) {}
680 virtual ~GenrbImporter();
681 virtual void getRules(
682 const char *localeID, const char *collationType,
683 UnicodeString &rules,
684 const char *&errorReason, UErrorCode &errorCode);
685
686 private:
687 const char *inputDir;
688 const char *outputDir;
689 };
690
691 GenrbImporter::~GenrbImporter() {}
692
693 void
694 GenrbImporter::getRules(
695 const char *localeID, const char *collationType,
696 UnicodeString &rules,
697 const char *& /*errorReason*/, UErrorCode &errorCode) {
698 CharString filename(localeID, errorCode);
699 for(int32_t i = 0; i < filename.length(); i++){
700 if(filename[i] == '-'){
701 filename.data()[i] = '_';
702 }
703 }
704 filename.append(".txt", errorCode);
705 if (U_FAILURE(errorCode)) {
706 return;
707 }
708 CharString inputDirBuf;
709 CharString openFileName;
710 if(inputDir == NULL) {
711 const char *filenameBegin = uprv_strrchr(filename.data(), U_FILE_SEP_CHAR);
712 if (filenameBegin != NULL) {
713 /*
714 * When a filename ../../../data/root.txt is specified,
715 * we presume that the input directory is ../../../data
716 * This is very important when the resource file includes
717 * another file, like UCARules.txt or thaidict.brk.
718 */
719 StringPiece dir = filename.toStringPiece();
720 const char *filenameLimit = filename.data() + filename.length();
721 dir.remove_suffix((int32_t)(filenameLimit - filenameBegin));
722 inputDirBuf.append(dir, errorCode);
723 inputDir = inputDirBuf.data();
724 }
725 }else{
726 int32_t dirlen = (int32_t)uprv_strlen(inputDir);
727
728 if((filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')) {
729 /*
730 * append the input dir to openFileName if the first char in
731 * filename is not file separator char and the last char input directory is not '.'.
732 * This is to support :
733 * genrb -s. /home/icu/data
734 * genrb -s. icu/data
735 * The user cannot mix notations like
736 * genrb -s. /icu/data --- the absolute path specified. -s redundant
737 * user should use
738 * genrb -s. icu/data --- start from CWD and look in icu/data dir
739 */
740 openFileName.append(inputDir, dirlen, errorCode);
741 if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
742 openFileName.append(U_FILE_SEP_CHAR, errorCode);
743 }
744 }
745 }
746 openFileName.append(filename, errorCode);
747 if(U_FAILURE(errorCode)) {
748 return;
749 }
750 // printf("GenrbImporter::getRules(%s, %s) reads %s\n", localeID, collationType, openFileName.data());
751 const char* cp = "";
752 LocalUCHARBUFPointer ucbuf(
753 ucbuf_open(openFileName.data(), &cp, getShowWarning(), TRUE, &errorCode));
754 if(errorCode == U_FILE_ACCESS_ERROR) {
755 fprintf(stderr, "couldn't open file %s\n", openFileName.data());
756 return;
757 }
758 if (ucbuf.isNull() || U_FAILURE(errorCode)) {
759 fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName.data(), u_errorName(errorCode));
760 return;
761 }
762
763 /* Parse the data into an SRBRoot */
764 struct SRBRoot *data =
765 parse(ucbuf.getAlias(), inputDir, outputDir, filename.data(), FALSE, FALSE, &errorCode);
766 if (U_FAILURE(errorCode)) {
767 return;
768 }
769
770 struct SResource *root = data->fRoot;
771 struct SResource *collations = resLookup(root, "collations");
772 if (collations != NULL) {
773 struct SResource *collation = resLookup(collations, collationType);
774 if (collation != NULL) {
775 struct SResource *sequence = resLookup(collation, "Sequence");
776 if (sequence != NULL && sequence->isString()) {
777 // No string pointer aliasing so that we need not hold onto the resource bundle.
778 StringResource *sr = static_cast<StringResource *>(sequence);
779 rules = sr->fString;
780 }
781 }
782 }
783 }
784
785 // Quick-and-dirty escaping function.
786 // Assumes that we are on an ASCII-based platform.
787 static void
788 escape(const UChar *s, char *buffer) {
789 int32_t length = u_strlen(s);
790 int32_t i = 0;
791 for (;;) {
792 UChar32 c;
793 U16_NEXT(s, i, length, c);
794 if (c == 0) {
795 *buffer = 0;
796 return;
797 } else if (0x20 <= c && c <= 0x7e) {
798 // printable ASCII
799 *buffer++ = (char)c; // assumes ASCII-based platform
800 } else {
801 buffer += sprintf(buffer, "\\u%04X", (int)c);
802 }
803 }
804 }
805
806 } // namespace
807
808 #endif // !UCONFIG_NO_COLLATION
809
810 static TableResource *
811 addCollation(ParseState* state, TableResource *result, const char *collationType,
812 uint32_t startline, UErrorCode *status)
813 {
814 // TODO: Use LocalPointer for result, or make caller close it when there is a failure.
815 struct SResource *member = NULL;
816 struct UString *tokenValue;
817 struct UString comment;
818 enum ETokenType token;
819 char subtag[1024];
820 UnicodeString rules;
821 UBool haveRules = FALSE;
822 UVersionInfo version;
823 uint32_t line;
824
825 /* '{' . (name resource)* '}' */
826 version[0]=0; version[1]=0; version[2]=0; version[3]=0;
827
828 for (;;)
829 {
830 ustr_init(&comment);
831 token = getToken(state, &tokenValue, &comment, &line, status);
832
833 if (token == TOK_CLOSE_BRACE)
834 {
835 break;
836 }
837
838 if (token != TOK_STRING)
839 {
840 res_close(result);
841 *status = U_INVALID_FORMAT_ERROR;
842
843 if (token == TOK_EOF)
844 {
845 error(startline, "unterminated table");
846 }
847 else
848 {
849 error(line, "Unexpected token %s", tokenNames[token]);
850 }
851
852 return NULL;
853 }
854
855 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
856
857 if (U_FAILURE(*status))
858 {
859 res_close(result);
860 return NULL;
861 }
862
863 member = parseResource(state, subtag, NULL, status);
864
865 if (U_FAILURE(*status))
866 {
867 res_close(result);
868 return NULL;
869 }
870 if (result == NULL)
871 {
872 // Ignore the parsed resources, continue parsing.
873 }
874 else if (uprv_strcmp(subtag, "Version") == 0 && member->isString())
875 {
876 StringResource *sr = static_cast<StringResource *>(member);
877 char ver[40];
878 int32_t length = sr->length();
879
880 if (length >= UPRV_LENGTHOF(ver))
881 {
882 length = UPRV_LENGTHOF(ver) - 1;
883 }
884
885 sr->fString.extract(0, length, ver, UPRV_LENGTHOF(ver), US_INV);
886 u_versionFromString(version, ver);
887
888 result->add(member, line, *status);
889 member = NULL;
890 }
891 else if(uprv_strcmp(subtag, "%%CollationBin")==0)
892 {
893 /* discard duplicate %%CollationBin if any*/
894 }
895 else if (uprv_strcmp(subtag, "Sequence") == 0 && member->isString())
896 {
897 StringResource *sr = static_cast<StringResource *>(member);
898 rules = sr->fString;
899 haveRules = TRUE;
900 // Defer building the collator until we have seen
901 // all sub-elements of the collation table, including the Version.
902 /* in order to achieve smaller data files, we can direct genrb */
903 /* to omit collation rules */
904 if(!state->omitCollationRules) {
905 result->add(member, line, *status);
906 member = NULL;
907 }
908 }
909 else // Just copy non-special items.
910 {
911 result->add(member, line, *status);
912 member = NULL;
913 }
914 res_close(member); // TODO: use LocalPointer
915 if (U_FAILURE(*status))
916 {
917 res_close(result);
918 return NULL;
919 }
920 }
921
922 if (!haveRules) { return result; }
923
924 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
925 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
926 (void)collationType;
927 #else
928 // CLDR ticket #3949, ICU ticket #8082:
929 // Do not build collation binary data for for-import-only "private" collation rule strings.
930 if (uprv_strncmp(collationType, "private-", 8) == 0) {
931 if(isVerbose()) {
932 printf("Not building %s~%s collation binary\n", state->filename, collationType);
933 }
934 return result;
935 }
936
937 if(!state->makeBinaryCollation) {
938 if(isVerbose()) {
939 printf("Not building %s~%s collation binary\n", state->filename, collationType);
940 }
941 return result;
942 }
943 UErrorCode intStatus = U_ZERO_ERROR;
944 UParseError parseError;
945 uprv_memset(&parseError, 0, sizeof(parseError));
946 GenrbImporter importer(state->inputdir, state->outputdir);
947 const icu::CollationTailoring *base = icu::CollationRoot::getRoot(intStatus);
948 if(U_FAILURE(intStatus)) {
949 error(line, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus));
950 res_close(result);
951 return NULL; // TODO: use LocalUResourceBundlePointer for result
952 }
953 icu::CollationBuilder builder(base, intStatus);
954 if(uprv_strncmp(collationType, "search", 6) == 0) {
955 builder.disableFastLatin(); // build fast-Latin table unless search collator
956 }
957 LocalPointer<icu::CollationTailoring> t(
958 builder.parseAndBuild(rules, version, &importer, &parseError, intStatus));
959 if(U_FAILURE(intStatus)) {
960 const char *reason = builder.getErrorReason();
961 if(reason == NULL) { reason = ""; }
962 error(line, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s %s",
963 state->filename, collationType,
964 (long)parseError.offset, u_errorName(intStatus), reason);
965 if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) {
966 // Print pre- and post-context.
967 char preBuffer[100], postBuffer[100];
968 escape(parseError.preContext, preBuffer);
969 escape(parseError.postContext, postBuffer);
970 error(line, " error context: \"...%s\" ! \"%s...\"", preBuffer, postBuffer);
971 }
972 if(isStrict() || t.isNull()) {
973 *status = intStatus;
974 res_close(result);
975 return NULL;
976 }
977 }
978 icu::LocalMemory<uint8_t> buffer;
979 int32_t capacity = 100000;
980 uint8_t *dest = buffer.allocateInsteadAndCopy(capacity);
981 if(dest == NULL) {
982 fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
983 (long)capacity);
984 *status = U_MEMORY_ALLOCATION_ERROR;
985 res_close(result);
986 return NULL;
987 }
988 int32_t indexes[icu::CollationDataReader::IX_TOTAL_SIZE + 1];
989 int32_t totalSize = icu::CollationDataWriter::writeTailoring(
990 *t, *t->settings, indexes, dest, capacity, intStatus);
991 if(intStatus == U_BUFFER_OVERFLOW_ERROR) {
992 intStatus = U_ZERO_ERROR;
993 capacity = totalSize;
994 dest = buffer.allocateInsteadAndCopy(capacity);
995 if(dest == NULL) {
996 fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
997 (long)capacity);
998 *status = U_MEMORY_ALLOCATION_ERROR;
999 res_close(result);
1000 return NULL;
1001 }
1002 totalSize = icu::CollationDataWriter::writeTailoring(
1003 *t, *t->settings, indexes, dest, capacity, intStatus);
1004 }
1005 if(U_FAILURE(intStatus)) {
1006 fprintf(stderr, "CollationDataWriter::writeTailoring() failed: %s\n",
1007 u_errorName(intStatus));
1008 res_close(result);
1009 return NULL;
1010 }
1011 if(isVerbose()) {
1012 printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType);
1013 icu::CollationInfo::printSizes(totalSize, indexes);
1014 if(t->settings->hasReordering()) {
1015 printf("%s~%s collation reordering ranges:\n", state->filename, collationType);
1016 icu::CollationInfo::printReorderRanges(
1017 *t->data, t->settings->reorderCodes, t->settings->reorderCodesLength);
1018 }
1019 }
1020 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", totalSize, dest, NULL, NULL, status);
1021 result->add(collationBin, line, *status);
1022 if (U_FAILURE(*status)) {
1023 res_close(result);
1024 return NULL;
1025 }
1026 #endif
1027 return result;
1028 }
1029
1030 static UBool
1031 keepCollationType(const char * /*type*/) {
1032 return TRUE;
1033 }
1034
1035 static struct SResource *
1036 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
1037 {
1038 TableResource *result = NULL;
1039 struct SResource *member = NULL;
1040 struct UString *tokenValue;
1041 struct UString comment;
1042 enum ETokenType token;
1043 char subtag[1024], typeKeyword[1024];
1044 uint32_t line;
1045
1046 result = table_open(state->bundle, tag, NULL, status);
1047
1048 if (result == NULL || U_FAILURE(*status))
1049 {
1050 return NULL;
1051 }
1052 if(isVerbose()){
1053 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1054 }
1055 if(!newCollation) {
1056 return addCollation(state, result, "(no type)", startline, status);
1057 }
1058 else {
1059 for(;;) {
1060 ustr_init(&comment);
1061 token = getToken(state, &tokenValue, &comment, &line, status);
1062
1063 if (token == TOK_CLOSE_BRACE)
1064 {
1065 return result;
1066 }
1067
1068 if (token != TOK_STRING)
1069 {
1070 res_close(result);
1071 *status = U_INVALID_FORMAT_ERROR;
1072
1073 if (token == TOK_EOF)
1074 {
1075 error(startline, "unterminated table");
1076 }
1077 else
1078 {
1079 error(line, "Unexpected token %s", tokenNames[token]);
1080 }
1081
1082 return NULL;
1083 }
1084
1085 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1086
1087 if (U_FAILURE(*status))
1088 {
1089 res_close(result);
1090 return NULL;
1091 }
1092
1093 if (uprv_strcmp(subtag, "default") == 0)
1094 {
1095 member = parseResource(state, subtag, NULL, status);
1096
1097 if (U_FAILURE(*status))
1098 {
1099 res_close(result);
1100 return NULL;
1101 }
1102
1103 result->add(member, line, *status);
1104 }
1105 else
1106 {
1107 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
1108 /* this probably needs to be refactored or recursively use the parser */
1109 /* first we assume that our collation table won't have the explicit type */
1110 /* then, we cannot handle aliases */
1111 if(token == TOK_OPEN_BRACE) {
1112 token = getToken(state, &tokenValue, &comment, &line, status);
1113 TableResource *collationRes;
1114 if (keepCollationType(subtag)) {
1115 collationRes = table_open(state->bundle, subtag, NULL, status);
1116 } else {
1117 collationRes = NULL;
1118 }
1119 // need to parse the collation data regardless
1120 collationRes = addCollation(state, collationRes, subtag, startline, status);
1121 if (collationRes != NULL) {
1122 result->add(collationRes, startline, *status);
1123 }
1124 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
1125 /* we could have a table too */
1126 token = peekToken(state, 1, &tokenValue, &line, &comment, status);
1127 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
1128 if(uprv_strcmp(typeKeyword, "alias") == 0) {
1129 member = parseResource(state, subtag, NULL, status);
1130 if (U_FAILURE(*status))
1131 {
1132 res_close(result);
1133 return NULL;
1134 }
1135
1136 result->add(member, line, *status);
1137 } else {
1138 res_close(result);
1139 *status = U_INVALID_FORMAT_ERROR;
1140 return NULL;
1141 }
1142 } else {
1143 res_close(result);
1144 *status = U_INVALID_FORMAT_ERROR;
1145 return NULL;
1146 }
1147 }
1148
1149 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1150
1151 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1152
1153 if (U_FAILURE(*status))
1154 {
1155 res_close(result);
1156 return NULL;
1157 }
1158 }
1159 }
1160 }
1161
1162 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1163 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1164 static struct SResource *
1165 realParseTable(ParseState* state, TableResource *table, char *tag, uint32_t startline, UErrorCode *status)
1166 {
1167 struct SResource *member = NULL;
1168 struct UString *tokenValue=NULL;
1169 struct UString comment;
1170 enum ETokenType token;
1171 char subtag[1024];
1172 uint32_t line;
1173 UBool readToken = FALSE;
1174
1175 /* '{' . (name resource)* '}' */
1176
1177 if(isVerbose()){
1178 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1179 }
1180 for (;;)
1181 {
1182 ustr_init(&comment);
1183 token = getToken(state, &tokenValue, &comment, &line, status);
1184
1185 if (token == TOK_CLOSE_BRACE)
1186 {
1187 if (!readToken) {
1188 warning(startline, "Encountered empty table");
1189 }
1190 return table;
1191 }
1192
1193 if (token != TOK_STRING)
1194 {
1195 *status = U_INVALID_FORMAT_ERROR;
1196
1197 if (token == TOK_EOF)
1198 {
1199 error(startline, "unterminated table");
1200 }
1201 else
1202 {
1203 error(line, "unexpected token %s", tokenNames[token]);
1204 }
1205
1206 return NULL;
1207 }
1208
1209 if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
1210 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1211 } else {
1212 *status = U_INVALID_FORMAT_ERROR;
1213 error(line, "invariant characters required for table keys");
1214 return NULL;
1215 }
1216
1217 if (U_FAILURE(*status))
1218 {
1219 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
1220 return NULL;
1221 }
1222
1223 member = parseResource(state, subtag, &comment, status);
1224
1225 if (member == NULL || U_FAILURE(*status))
1226 {
1227 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
1228 return NULL;
1229 }
1230
1231 table->add(member, line, *status);
1232
1233 if (U_FAILURE(*status))
1234 {
1235 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
1236 return NULL;
1237 }
1238 readToken = TRUE;
1239 ustr_deinit(&comment);
1240 }
1241
1242 /* not reached */
1243 /* A compiler warning will appear if all paths don't contain a return statement. */
1244 /* *status = U_INTERNAL_PROGRAM_ERROR;
1245 return NULL;*/
1246 }
1247
1248 static struct SResource *
1249 parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1250 {
1251 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
1252 {
1253 return parseCollationElements(state, tag, startline, FALSE, status);
1254 }
1255 if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1256 {
1257 return parseCollationElements(state, tag, startline, TRUE, status);
1258 }
1259 if(isVerbose()){
1260 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1261 }
1262
1263 TableResource *result = table_open(state->bundle, tag, comment, status);
1264
1265 if (result == NULL || U_FAILURE(*status))
1266 {
1267 return NULL;
1268 }
1269 return realParseTable(state, result, tag, startline, status);
1270 }
1271
1272 static struct SResource *
1273 parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1274 {
1275 struct SResource *member = NULL;
1276 struct UString *tokenValue;
1277 struct UString memberComments;
1278 enum ETokenType token;
1279 UBool readToken = FALSE;
1280
1281 ArrayResource *result = array_open(state->bundle, tag, comment, status);
1282
1283 if (result == NULL || U_FAILURE(*status))
1284 {
1285 return NULL;
1286 }
1287 if(isVerbose()){
1288 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1289 }
1290
1291 ustr_init(&memberComments);
1292
1293 /* '{' . resource [','] '}' */
1294 for (;;)
1295 {
1296 /* reset length */
1297 ustr_setlen(&memberComments, 0, status);
1298
1299 /* check for end of array, but don't consume next token unless it really is the end */
1300 token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
1301
1302
1303 if (token == TOK_CLOSE_BRACE)
1304 {
1305 getToken(state, NULL, NULL, NULL, status);
1306 if (!readToken) {
1307 warning(startline, "Encountered empty array");
1308 }
1309 break;
1310 }
1311
1312 if (token == TOK_EOF)
1313 {
1314 res_close(result);
1315 *status = U_INVALID_FORMAT_ERROR;
1316 error(startline, "unterminated array");
1317 return NULL;
1318 }
1319
1320 /* string arrays are a special case */
1321 if (token == TOK_STRING)
1322 {
1323 getToken(state, &tokenValue, &memberComments, NULL, status);
1324 member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
1325 }
1326 else
1327 {
1328 member = parseResource(state, NULL, &memberComments, status);
1329 }
1330
1331 if (member == NULL || U_FAILURE(*status))
1332 {
1333 res_close(result);
1334 return NULL;
1335 }
1336
1337 result->add(member);
1338
1339 /* eat optional comma if present */
1340 token = peekToken(state, 0, NULL, NULL, NULL, status);
1341
1342 if (token == TOK_COMMA)
1343 {
1344 getToken(state, NULL, NULL, NULL, status);
1345 }
1346
1347 if (U_FAILURE(*status))
1348 {
1349 res_close(result);
1350 return NULL;
1351 }
1352 readToken = TRUE;
1353 }
1354
1355 ustr_deinit(&memberComments);
1356 return result;
1357 }
1358
1359 static struct SResource *
1360 parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1361 {
1362 enum ETokenType token;
1363 char *string;
1364 int32_t value;
1365 UBool readToken = FALSE;
1366 char *stopstring;
1367 uint32_t len;
1368 struct UString memberComments;
1369
1370 IntVectorResource *result = intvector_open(state->bundle, tag, comment, status);
1371
1372 if (result == NULL || U_FAILURE(*status))
1373 {
1374 return NULL;
1375 }
1376
1377 if(isVerbose()){
1378 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1379 }
1380 ustr_init(&memberComments);
1381 /* '{' . string [','] '}' */
1382 for (;;)
1383 {
1384 ustr_setlen(&memberComments, 0, status);
1385
1386 /* check for end of array, but don't consume next token unless it really is the end */
1387 token = peekToken(state, 0, NULL, NULL,&memberComments, status);
1388
1389 if (token == TOK_CLOSE_BRACE)
1390 {
1391 /* it's the end, consume the close brace */
1392 getToken(state, NULL, NULL, NULL, status);
1393 if (!readToken) {
1394 warning(startline, "Encountered empty int vector");
1395 }
1396 ustr_deinit(&memberComments);
1397 return result;
1398 }
1399
1400 string = getInvariantString(state, NULL, NULL, status);
1401
1402 if (U_FAILURE(*status))
1403 {
1404 res_close(result);
1405 return NULL;
1406 }
1407
1408 /* For handling illegal char in the Intvector */
1409 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1410 len=(uint32_t)(stopstring-string);
1411
1412 if(len==uprv_strlen(string))
1413 {
1414 result->add(value, *status);
1415 uprv_free(string);
1416 token = peekToken(state, 0, NULL, NULL, NULL, status);
1417 }
1418 else
1419 {
1420 uprv_free(string);
1421 *status=U_INVALID_CHAR_FOUND;
1422 }
1423
1424 if (U_FAILURE(*status))
1425 {
1426 res_close(result);
1427 return NULL;
1428 }
1429
1430 /* the comma is optional (even though it is required to prevent the reader from concatenating
1431 consecutive entries) so that a missing comma on the last entry isn't an error */
1432 if (token == TOK_COMMA)
1433 {
1434 getToken(state, NULL, NULL, NULL, status);
1435 }
1436 readToken = TRUE;
1437 }
1438
1439 /* not reached */
1440 /* A compiler warning will appear if all paths don't contain a return statement. */
1441 /* intvector_close(result, status);
1442 *status = U_INTERNAL_PROGRAM_ERROR;
1443 return NULL;*/
1444 }
1445
1446 static struct SResource *
1447 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1448 {
1449 uint32_t line;
1450 LocalMemory<char> string(getInvariantString(state, &line, NULL, status));
1451 if (string.isNull() || U_FAILURE(*status))
1452 {
1453 return NULL;
1454 }
1455
1456 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1457 if (U_FAILURE(*status))
1458 {
1459 return NULL;
1460 }
1461
1462 if(isVerbose()){
1463 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1464 }
1465
1466 uint32_t count = (uint32_t)uprv_strlen(string.getAlias());
1467 if (count > 0){
1468 if((count % 2)==0){
1469 LocalMemory<uint8_t> value;
1470 if (value.allocateInsteadAndCopy(count) == NULL)
1471 {
1472 *status = U_MEMORY_ALLOCATION_ERROR;
1473 return NULL;
1474 }
1475
1476 char toConv[3] = {'\0', '\0', '\0'};
1477 for (uint32_t i = 0; i < count; i += 2)
1478 {
1479 toConv[0] = string[i];
1480 toConv[1] = string[i + 1];
1481
1482 char *stopstring;
1483 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
1484 uint32_t len=(uint32_t)(stopstring-toConv);
1485
1486 if(len!=2)
1487 {
1488 *status=U_INVALID_CHAR_FOUND;
1489 return NULL;
1490 }
1491 }
1492
1493 return bin_open(state->bundle, tag, count >> 1, value.getAlias(), NULL, comment, status);
1494 }
1495 else
1496 {
1497 *status = U_INVALID_CHAR_FOUND;
1498 error(line, "Encountered invalid binary value (length is odd)");
1499 return NULL;
1500 }
1501 }
1502 else
1503 {
1504 warning(startline, "Encountered empty binary value");
1505 return bin_open(state->bundle, tag, 0, NULL, "", comment, status);
1506 }
1507 }
1508
1509 static struct SResource *
1510 parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1511 {
1512 struct SResource *result = NULL;
1513 int32_t value;
1514 char *string;
1515 char *stopstring;
1516 uint32_t len;
1517
1518 string = getInvariantString(state, NULL, NULL, status);
1519
1520 if (string == NULL || U_FAILURE(*status))
1521 {
1522 return NULL;
1523 }
1524
1525 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1526
1527 if (U_FAILURE(*status))
1528 {
1529 uprv_free(string);
1530 return NULL;
1531 }
1532
1533 if(isVerbose()){
1534 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1535 }
1536
1537 if (uprv_strlen(string) <= 0)
1538 {
1539 warning(startline, "Encountered empty integer. Default value is 0.");
1540 }
1541
1542 /* Allow integer support for hexdecimal, octal digit and decimal*/
1543 /* and handle illegal char in the integer*/
1544 value = uprv_strtoul(string, &stopstring, 0);
1545 len=(uint32_t)(stopstring-string);
1546 if(len==uprv_strlen(string))
1547 {
1548 result = int_open(state->bundle, tag, value, comment, status);
1549 }
1550 else
1551 {
1552 *status=U_INVALID_CHAR_FOUND;
1553 }
1554 uprv_free(string);
1555
1556 return result;
1557 }
1558
1559 static struct SResource *
1560 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1561 {
1562 uint32_t line;
1563 LocalMemory<char> filename(getInvariantString(state, &line, NULL, status));
1564 if (U_FAILURE(*status))
1565 {
1566 return NULL;
1567 }
1568
1569 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1570
1571 if (U_FAILURE(*status))
1572 {
1573 return NULL;
1574 }
1575
1576 if(isVerbose()){
1577 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1578 }
1579
1580 /* Open the input file for reading */
1581 CharString fullname;
1582 if (state->inputdir != NULL) {
1583 fullname.append(state->inputdir, *status);
1584 }
1585 fullname.appendPathPart(filename.getAlias(), *status);
1586 if (U_FAILURE(*status)) {
1587 return NULL;
1588 }
1589
1590 FileStream *file = T_FileStream_open(fullname.data(), "rb");
1591 if (file == NULL)
1592 {
1593 error(line, "couldn't open input file %s", filename.getAlias());
1594 *status = U_FILE_ACCESS_ERROR;
1595 return NULL;
1596 }
1597
1598 int32_t len = T_FileStream_size(file);
1599 LocalMemory<uint8_t> data;
1600 if(data.allocateInsteadAndCopy(len) == NULL)
1601 {
1602 *status = U_MEMORY_ALLOCATION_ERROR;
1603 T_FileStream_close (file);
1604 return NULL;
1605 }
1606
1607 /* int32_t numRead = */ T_FileStream_read(file, data.getAlias(), len);
1608 T_FileStream_close (file);
1609
1610 return bin_open(state->bundle, tag, len, data.getAlias(), fullname.data(), comment, status);
1611 }
1612
1613 static struct SResource *
1614 parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1615 {
1616 struct SResource *result;
1617 int32_t len=0;
1618 char *filename;
1619 uint32_t line;
1620 UChar *pTarget = NULL;
1621
1622 UCHARBUF *ucbuf;
1623 char *fullname = NULL;
1624 int32_t count = 0;
1625 const char* cp = NULL;
1626 const UChar* uBuffer = NULL;
1627
1628 filename = getInvariantString(state, &line, NULL, status);
1629 count = (int32_t)uprv_strlen(filename);
1630
1631 if (U_FAILURE(*status))
1632 {
1633 return NULL;
1634 }
1635
1636 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1637
1638 if (U_FAILURE(*status))
1639 {
1640 uprv_free(filename);
1641 return NULL;
1642 }
1643
1644 if(isVerbose()){
1645 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1646 }
1647
1648 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1649 /* test for NULL */
1650 if(fullname == NULL)
1651 {
1652 *status = U_MEMORY_ALLOCATION_ERROR;
1653 uprv_free(filename);
1654 return NULL;
1655 }
1656
1657 if(state->inputdir!=NULL){
1658 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1659 {
1660
1661 uprv_strcpy(fullname, state->inputdir);
1662
1663 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1664 fullname[state->inputdirLength + 1] = '\0';
1665
1666 uprv_strcat(fullname, filename);
1667 }
1668 else
1669 {
1670 uprv_strcpy(fullname, state->inputdir);
1671 uprv_strcat(fullname, filename);
1672 }
1673 }else{
1674 uprv_strcpy(fullname,filename);
1675 }
1676
1677 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1678
1679 if (U_FAILURE(*status)) {
1680 error(line, "couldn't open input file %s\n", filename);
1681 return NULL;
1682 }
1683
1684 uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
1685 result = string_open(state->bundle, tag, uBuffer, len, comment, status);
1686
1687 ucbuf_close(ucbuf);
1688
1689 uprv_free(pTarget);
1690
1691 uprv_free(filename);
1692 uprv_free(fullname);
1693
1694 return result;
1695 }
1696
1697
1698
1699
1700
1701 U_STRING_DECL(k_type_string, "string", 6);
1702 U_STRING_DECL(k_type_binary, "binary", 6);
1703 U_STRING_DECL(k_type_bin, "bin", 3);
1704 U_STRING_DECL(k_type_table, "table", 5);
1705 U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17);
1706 U_STRING_DECL(k_type_int, "int", 3);
1707 U_STRING_DECL(k_type_integer, "integer", 7);
1708 U_STRING_DECL(k_type_array, "array", 5);
1709 U_STRING_DECL(k_type_alias, "alias", 5);
1710 U_STRING_DECL(k_type_intvector, "intvector", 9);
1711 U_STRING_DECL(k_type_import, "import", 6);
1712 U_STRING_DECL(k_type_include, "include", 7);
1713
1714 /* Various non-standard processing plugins that create one or more special resources. */
1715 U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1716 U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18);
1717 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23);
1718 U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19);
1719
1720 typedef enum EResourceType
1721 {
1722 RESTYPE_UNKNOWN,
1723 RESTYPE_STRING,
1724 RESTYPE_BINARY,
1725 RESTYPE_TABLE,
1726 RESTYPE_TABLE_NO_FALLBACK,
1727 RESTYPE_INTEGER,
1728 RESTYPE_ARRAY,
1729 RESTYPE_ALIAS,
1730 RESTYPE_INTVECTOR,
1731 RESTYPE_IMPORT,
1732 RESTYPE_INCLUDE,
1733 RESTYPE_PROCESS_UCA_RULES,
1734 RESTYPE_PROCESS_COLLATION,
1735 RESTYPE_PROCESS_TRANSLITERATOR,
1736 RESTYPE_PROCESS_DEPENDENCY,
1737 RESTYPE_RESERVED
1738 } EResourceType;
1739
1740 static struct {
1741 const char *nameChars; /* only used for debugging */
1742 const UChar *nameUChars;
1743 ParseResourceFunction *parseFunction;
1744 } gResourceTypes[] = {
1745 {"Unknown", NULL, NULL},
1746 {"string", k_type_string, parseString},
1747 {"binary", k_type_binary, parseBinary},
1748 {"table", k_type_table, parseTable},
1749 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1750 {"integer", k_type_integer, parseInteger},
1751 {"array", k_type_array, parseArray},
1752 {"alias", k_type_alias, parseAlias},
1753 {"intvector", k_type_intvector, parseIntVector},
1754 {"import", k_type_import, parseImport},
1755 {"include", k_type_include, parseInclude},
1756 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1757 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1758 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1759 {"process(dependency)", k_type_plugin_dependency, parseDependency},
1760 {"reserved", NULL, NULL}
1761 };
1762
1763 void initParser()
1764 {
1765 U_STRING_INIT(k_type_string, "string", 6);
1766 U_STRING_INIT(k_type_binary, "binary", 6);
1767 U_STRING_INIT(k_type_bin, "bin", 3);
1768 U_STRING_INIT(k_type_table, "table", 5);
1769 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17);
1770 U_STRING_INIT(k_type_int, "int", 3);
1771 U_STRING_INIT(k_type_integer, "integer", 7);
1772 U_STRING_INIT(k_type_array, "array", 5);
1773 U_STRING_INIT(k_type_alias, "alias", 5);
1774 U_STRING_INIT(k_type_intvector, "intvector", 9);
1775 U_STRING_INIT(k_type_import, "import", 6);
1776 U_STRING_INIT(k_type_include, "include", 7);
1777
1778 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1779 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18);
1780 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23);
1781 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19);
1782 }
1783
1784 static inline UBool isTable(enum EResourceType type) {
1785 return (UBool)(type==RESTYPE_TABLE || type==RESTYPE_TABLE_NO_FALLBACK);
1786 }
1787
1788 static enum EResourceType
1789 parseResourceType(ParseState* state, UErrorCode *status)
1790 {
1791 struct UString *tokenValue;
1792 struct UString comment;
1793 enum EResourceType result = RESTYPE_UNKNOWN;
1794 uint32_t line=0;
1795 ustr_init(&comment);
1796 expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
1797
1798 if (U_FAILURE(*status))
1799 {
1800 return RESTYPE_UNKNOWN;
1801 }
1802
1803 *status = U_ZERO_ERROR;
1804
1805 /* Search for normal types */
1806 result=RESTYPE_UNKNOWN;
1807 while ((result=(EResourceType)(result+1)) < RESTYPE_RESERVED) {
1808 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1809 break;
1810 }
1811 }
1812 /* Now search for the aliases */
1813 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
1814 result = RESTYPE_INTEGER;
1815 }
1816 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
1817 result = RESTYPE_BINARY;
1818 }
1819 else if (result == RESTYPE_RESERVED) {
1820 char tokenBuffer[1024];
1821 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1822 tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1823 *status = U_INVALID_FORMAT_ERROR;
1824 error(line, "unknown resource type '%s'", tokenBuffer);
1825 }
1826
1827 return result;
1828 }
1829
1830 /* parse a non-top-level resource */
1831 static struct SResource *
1832 parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
1833 {
1834 enum ETokenType token;
1835 enum EResourceType resType = RESTYPE_UNKNOWN;
1836 ParseResourceFunction *parseFunction = NULL;
1837 struct UString *tokenValue;
1838 uint32_t startline;
1839 uint32_t line;
1840
1841
1842 token = getToken(state, &tokenValue, NULL, &startline, status);
1843
1844 if(isVerbose()){
1845 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1846 }
1847
1848 /* name . [ ':' type ] '{' resource '}' */
1849 /* This function parses from the colon onwards. If the colon is present, parse the
1850 type then try to parse a resource of that type. If there is no explicit type,
1851 work it out using the lookahead tokens. */
1852 switch (token)
1853 {
1854 case TOK_EOF:
1855 *status = U_INVALID_FORMAT_ERROR;
1856 error(startline, "Unexpected EOF encountered");
1857 return NULL;
1858
1859 case TOK_ERROR:
1860 *status = U_INVALID_FORMAT_ERROR;
1861 return NULL;
1862
1863 case TOK_COLON:
1864 resType = parseResourceType(state, status);
1865 expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
1866
1867 if (U_FAILURE(*status))
1868 {
1869 return NULL;
1870 }
1871
1872 break;
1873
1874 case TOK_OPEN_BRACE:
1875 break;
1876
1877 default:
1878 *status = U_INVALID_FORMAT_ERROR;
1879 error(startline, "syntax error while reading a resource, expected '{' or ':'");
1880 return NULL;
1881 }
1882
1883
1884 if (resType == RESTYPE_UNKNOWN)
1885 {
1886 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
1887 We could have any of the following:
1888 { { => array (nested)
1889 { :/} => array
1890 { string , => string array
1891
1892 { string { => table
1893
1894 { string :/{ => table
1895 { string } => string
1896 */
1897
1898 token = peekToken(state, 0, NULL, &line, NULL,status);
1899
1900 if (U_FAILURE(*status))
1901 {
1902 return NULL;
1903 }
1904
1905 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
1906 {
1907 resType = RESTYPE_ARRAY;
1908 }
1909 else if (token == TOK_STRING)
1910 {
1911 token = peekToken(state, 1, NULL, &line, NULL, status);
1912
1913 if (U_FAILURE(*status))
1914 {
1915 return NULL;
1916 }
1917
1918 switch (token)
1919 {
1920 case TOK_COMMA: resType = RESTYPE_ARRAY; break;
1921 case TOK_OPEN_BRACE: resType = RESTYPE_TABLE; break;
1922 case TOK_CLOSE_BRACE: resType = RESTYPE_STRING; break;
1923 case TOK_COLON: resType = RESTYPE_TABLE; break;
1924 default:
1925 *status = U_INVALID_FORMAT_ERROR;
1926 error(line, "Unexpected token after string, expected ',', '{' or '}'");
1927 return NULL;
1928 }
1929 }
1930 else
1931 {
1932 *status = U_INVALID_FORMAT_ERROR;
1933 error(line, "Unexpected token after '{'");
1934 return NULL;
1935 }
1936
1937 /* printf("Type guessed as %s\n", resourceNames[resType]); */
1938 } else if(resType == RESTYPE_TABLE_NO_FALLBACK) {
1939 *status = U_INVALID_FORMAT_ERROR;
1940 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
1941 return NULL;
1942 }
1943
1944
1945 /* We should now know what we need to parse next, so call the appropriate parser
1946 function and return. */
1947 parseFunction = gResourceTypes[resType].parseFunction;
1948 if (parseFunction != NULL) {
1949 return parseFunction(state, tag, startline, comment, status);
1950 }
1951 else {
1952 *status = U_INTERNAL_PROGRAM_ERROR;
1953 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
1954 }
1955
1956 return NULL;
1957 }
1958
1959 /* parse the top-level resource */
1960 struct SRBRoot *
1961 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, const char *filename,
1962 UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status)
1963 {
1964 struct UString *tokenValue;
1965 struct UString comment;
1966 uint32_t line;
1967 enum EResourceType bundleType;
1968 enum ETokenType token;
1969 ParseState state;
1970 uint32_t i;
1971
1972
1973 for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
1974 {
1975 ustr_init(&state.lookahead[i].value);
1976 ustr_init(&state.lookahead[i].comment);
1977 }
1978
1979 initLookahead(&state, buf, status);
1980
1981 state.inputdir = inputDir;
1982 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
1983 state.outputdir = outputDir;
1984 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
1985 state.filename = filename;
1986 state.makeBinaryCollation = makeBinaryCollation;
1987 state.omitCollationRules = omitCollationRules;
1988
1989 ustr_init(&comment);
1990 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
1991
1992 state.bundle = new SRBRoot(&comment, FALSE, *status);
1993
1994 if (state.bundle == NULL || U_FAILURE(*status))
1995 {
1996 return NULL;
1997 }
1998
1999
2000 state.bundle->setLocale(tokenValue->fChars, *status);
2001
2002 /* The following code is to make Empty bundle work no matter with :table specifer or not */
2003 token = getToken(&state, NULL, NULL, &line, status);
2004 if(token==TOK_COLON) {
2005 *status=U_ZERO_ERROR;
2006 bundleType=parseResourceType(&state, status);
2007
2008 if(isTable(bundleType))
2009 {
2010 expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
2011 }
2012 else
2013 {
2014 *status=U_PARSE_ERROR;
2015 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
2016 }
2017 }
2018 else
2019 {
2020 /* not a colon */
2021 if(token==TOK_OPEN_BRACE)
2022 {
2023 *status=U_ZERO_ERROR;
2024 bundleType=RESTYPE_TABLE;
2025 }
2026 else
2027 {
2028 /* neither colon nor open brace */
2029 *status=U_PARSE_ERROR;
2030 bundleType=RESTYPE_UNKNOWN;
2031 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
2032 }
2033 }
2034
2035 if (U_FAILURE(*status))
2036 {
2037 delete state.bundle;
2038 return NULL;
2039 }
2040
2041 if(bundleType==RESTYPE_TABLE_NO_FALLBACK) {
2042 /*
2043 * Parse a top-level table with the table(nofallback) declaration.
2044 * This is the same as a regular table, but also sets the
2045 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2046 */
2047 state.bundle->fNoFallback=TRUE;
2048 }
2049 /* top-level tables need not handle special table names like "collations" */
2050 assert(!state.bundle->fIsPoolBundle);
2051 assert(state.bundle->fRoot->fType == URES_TABLE);
2052 TableResource *rootTable = static_cast<TableResource *>(state.bundle->fRoot);
2053 realParseTable(&state, rootTable, NULL, line, status);
2054 if(dependencyArray!=NULL){
2055 rootTable->add(dependencyArray, 0, *status);
2056 dependencyArray = NULL;
2057 }
2058 if (U_FAILURE(*status))
2059 {
2060 delete state.bundle;
2061 res_close(dependencyArray);
2062 return NULL;
2063 }
2064
2065 if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
2066 {
2067 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
2068 if(isStrict()){
2069 *status = U_INVALID_FORMAT_ERROR;
2070 return NULL;
2071 }
2072 }
2073
2074 cleanupLookahead(&state);
2075 ustr_deinit(&comment);
2076 return state.bundle;
2077 }