]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/genrb/parse.cpp
ICU-62107.0.1.tar.gz
[apple/icu.git] / icuSources / tools / genrb / parse.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 1998-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 *
11 * File parse.cpp
12 *
13 * Modification History:
14 *
15 * Date Name Description
16 * 05/26/99 stephen Creation.
17 * 02/25/00 weiv Overhaul to write udata
18 * 5/10/01 Ram removed ustdio dependency
19 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
20 *******************************************************************************
21 */
22
23 // Safer use of UnicodeString.
24 #ifndef UNISTR_FROM_CHAR_EXPLICIT
25 # define UNISTR_FROM_CHAR_EXPLICIT explicit
26 #endif
27
28 // Less important, but still a good idea.
29 #ifndef UNISTR_FROM_STRING_EXPLICIT
30 # define UNISTR_FROM_STRING_EXPLICIT explicit
31 #endif
32
33 #include <assert.h>
34 #include "parse.h"
35 #include "errmsg.h"
36 #include "uhash.h"
37 #include "cmemory.h"
38 #include "cstring.h"
39 #include "uinvchar.h"
40 #include "read.h"
41 #include "ustr.h"
42 #include "reslist.h"
43 #include "rbt_pars.h"
44 #include "genrb.h"
45 #include "unicode/stringpiece.h"
46 #include "unicode/unistr.h"
47 #include "unicode/ustring.h"
48 #include "unicode/uscript.h"
49 #include "unicode/utf16.h"
50 #include "unicode/putil.h"
51 #include "charstr.h"
52 #include "collationbuilder.h"
53 #include "collationdata.h"
54 #include "collationdatareader.h"
55 #include "collationdatawriter.h"
56 #include "collationfastlatinbuilder.h"
57 #include "collationinfo.h"
58 #include "collationroot.h"
59 #include "collationruleparser.h"
60 #include "collationtailoring.h"
61 #include <stdio.h>
62
63 /* Number of tokens to read ahead of the current stream position */
64 #define MAX_LOOKAHEAD 3
65
66 #define CR 0x000D
67 #define LF 0x000A
68 #define SPACE 0x0020
69 #define TAB 0x0009
70 #define ESCAPE 0x005C
71 #define HASH 0x0023
72 #define QUOTE 0x0027
73 #define ZERO 0x0030
74 #define STARTCOMMAND 0x005B
75 #define ENDCOMMAND 0x005D
76 #define OPENSQBRACKET 0x005B
77 #define CLOSESQBRACKET 0x005D
78
79 using icu::CharString;
80 using icu::LocalMemory;
81 using icu::LocalPointer;
82 using icu::LocalUCHARBUFPointer;
83 using icu::StringPiece;
84 using icu::UnicodeString;
85
86 struct Lookahead
87 {
88 enum ETokenType type;
89 struct UString value;
90 struct UString comment;
91 uint32_t line;
92 };
93
94 /* keep in sync with token defines in read.h */
95 const char *tokenNames[TOK_TOKEN_COUNT] =
96 {
97 "string", /* A string token, such as "MonthNames" */
98 "'{'", /* An opening brace character */
99 "'}'", /* A closing brace character */
100 "','", /* A comma */
101 "':'", /* A colon */
102
103 "<end of file>", /* End of the file has been reached successfully */
104 "<end of line>"
105 };
106
107 /* Just to store "TRUE" */
108 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
109
110 typedef struct {
111 struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
112 uint32_t lookaheadPosition;
113 UCHARBUF *buffer;
114 struct SRBRoot *bundle;
115 const char *inputdir;
116 uint32_t inputdirLength;
117 const char *outputdir;
118 uint32_t outputdirLength;
119 const char *filename;
120 UBool makeBinaryCollation;
121 UBool omitCollationRules;
122 } ParseState;
123
124 typedef struct SResource *
125 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
126
127 static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
128
129 /* The nature of the lookahead buffer:
130 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
131 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
132 When getToken is called, the current pointer is moved to the next slot and the
133 old slot is filled with the next token from the reader by calling getNextToken.
134 The token values are stored in the slot, which means that token values don't
135 survive a call to getToken, ie.
136
137 UString *value;
138
139 getToken(&value, NULL, status);
140 getToken(NULL, NULL, status); bad - value is now a different string
141 */
142 static void
143 initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
144 {
145 static uint32_t initTypeStrings = 0;
146 uint32_t i;
147
148 if (!initTypeStrings)
149 {
150 initTypeStrings = 1;
151 }
152
153 state->lookaheadPosition = 0;
154 state->buffer = buf;
155
156 resetLineNumber();
157
158 for (i = 0; i < MAX_LOOKAHEAD; i++)
159 {
160 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
161 if (U_FAILURE(*status))
162 {
163 return;
164 }
165 }
166
167 *status = U_ZERO_ERROR;
168 }
169
170 static void
171 cleanupLookahead(ParseState* state)
172 {
173 uint32_t i;
174 for (i = 0; i <= MAX_LOOKAHEAD; i++)
175 {
176 ustr_deinit(&state->lookahead[i].value);
177 ustr_deinit(&state->lookahead[i].comment);
178 }
179
180 }
181
182 static enum ETokenType
183 getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
184 {
185 enum ETokenType result;
186 uint32_t i;
187
188 result = state->lookahead[state->lookaheadPosition].type;
189
190 if (tokenValue != NULL)
191 {
192 *tokenValue = &state->lookahead[state->lookaheadPosition].value;
193 }
194
195 if (linenumber != NULL)
196 {
197 *linenumber = state->lookahead[state->lookaheadPosition].line;
198 }
199
200 if (comment != NULL)
201 {
202 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
203 }
204
205 i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
206 state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
207 ustr_setlen(&state->lookahead[i].comment, 0, status);
208 ustr_setlen(&state->lookahead[i].value, 0, status);
209 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
210
211 /* printf("getToken, returning %s\n", tokenNames[result]); */
212
213 return result;
214 }
215
216 static enum ETokenType
217 peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
218 {
219 uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
220
221 if (U_FAILURE(*status))
222 {
223 return TOK_ERROR;
224 }
225
226 if (lookaheadCount >= MAX_LOOKAHEAD)
227 {
228 *status = U_INTERNAL_PROGRAM_ERROR;
229 return TOK_ERROR;
230 }
231
232 if (tokenValue != NULL)
233 {
234 *tokenValue = &state->lookahead[i].value;
235 }
236
237 if (linenumber != NULL)
238 {
239 *linenumber = state->lookahead[i].line;
240 }
241
242 if(comment != NULL){
243 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
244 }
245
246 return state->lookahead[i].type;
247 }
248
249 static void
250 expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
251 {
252 uint32_t line;
253
254 enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
255
256 if (linenumber != NULL)
257 {
258 *linenumber = line;
259 }
260
261 if (U_FAILURE(*status))
262 {
263 return;
264 }
265
266 if (token != expectedToken)
267 {
268 *status = U_INVALID_FORMAT_ERROR;
269 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
270 }
271 else
272 {
273 *status = U_ZERO_ERROR;
274 }
275 }
276
277 static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
278 {
279 struct UString *tokenValue;
280 char *result;
281 uint32_t count;
282
283 expect(state, TOK_STRING, &tokenValue, comment, line, status);
284
285 if (U_FAILURE(*status))
286 {
287 return NULL;
288 }
289
290 count = u_strlen(tokenValue->fChars);
291 if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
292 *status = U_INVALID_FORMAT_ERROR;
293 error(*line, "invariant characters required for table keys, binary data, etc.");
294 return NULL;
295 }
296
297 result = static_cast<char *>(uprv_malloc(count+1));
298
299 if (result == NULL)
300 {
301 *status = U_MEMORY_ALLOCATION_ERROR;
302 return NULL;
303 }
304
305 u_UCharsToChars(tokenValue->fChars, result, count+1);
306 return result;
307 }
308
309 static struct SResource *
310 parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
311 {
312 struct SResource *result = NULL;
313 struct UString *tokenValue;
314 FileStream *file = NULL;
315 char filename[256] = { '\0' };
316 char cs[128] = { '\0' };
317 uint32_t line;
318 UBool quoted = FALSE;
319 UCHARBUF *ucbuf=NULL;
320 UChar32 c = 0;
321 const char* cp = NULL;
322 UChar *pTarget = NULL;
323 UChar *target = NULL;
324 UChar *targetLimit = NULL;
325 int32_t size = 0;
326
327 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
328
329 if(isVerbose()){
330 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
331 }
332
333 if (U_FAILURE(*status))
334 {
335 return NULL;
336 }
337 /* make the filename including the directory */
338 if (state->inputdir != NULL)
339 {
340 uprv_strcat(filename, state->inputdir);
341
342 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
343 {
344 uprv_strcat(filename, U_FILE_SEP_STRING);
345 }
346 }
347
348 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
349
350 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
351
352 if (U_FAILURE(*status))
353 {
354 return NULL;
355 }
356 uprv_strcat(filename, cs);
357
358 if(state->omitCollationRules) {
359 return res_none();
360 }
361
362 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
363
364 if (U_FAILURE(*status)) {
365 error(line, "An error occurred while opening the input file %s\n", filename);
366 return NULL;
367 }
368
369 /* We allocate more space than actually required
370 * since the actual size needed for storing UChars
371 * is not known in UTF-8 byte stream
372 */
373 size = ucbuf_size(ucbuf) + 1;
374 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
375 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
376 target = pTarget;
377 targetLimit = pTarget+size;
378
379 /* read the rules into the buffer */
380 while (target < targetLimit)
381 {
382 c = ucbuf_getc(ucbuf, status);
383 if(c == QUOTE) {
384 quoted = (UBool)!quoted;
385 }
386 /* weiv (06/26/2002): adding the following:
387 * - preserving spaces in commands [...]
388 * - # comments until the end of line
389 */
390 if (c == STARTCOMMAND && !quoted)
391 {
392 /* preserve commands
393 * closing bracket will be handled by the
394 * append at the end of the loop
395 */
396 while(c != ENDCOMMAND) {
397 U_APPEND_CHAR32_ONLY(c, target);
398 c = ucbuf_getc(ucbuf, status);
399 }
400 }
401 else if (c == HASH && !quoted) {
402 /* skip comments */
403 while(c != CR && c != LF) {
404 c = ucbuf_getc(ucbuf, status);
405 }
406 continue;
407 }
408 else if (c == ESCAPE)
409 {
410 c = unescape(ucbuf, status);
411
412 if (c == (UChar32)U_ERR)
413 {
414 uprv_free(pTarget);
415 T_FileStream_close(file);
416 return NULL;
417 }
418 }
419 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
420 {
421 /* ignore spaces carriage returns
422 * and line feed unless in the form \uXXXX
423 */
424 continue;
425 }
426
427 /* Append UChar * after dissembling if c > 0xffff*/
428 if (c != (UChar32)U_EOF)
429 {
430 U_APPEND_CHAR32_ONLY(c, target);
431 }
432 else
433 {
434 break;
435 }
436 }
437
438 /* terminate the string */
439 if(target < targetLimit){
440 *target = 0x0000;
441 }
442
443 result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
444
445
446 ucbuf_close(ucbuf);
447 uprv_free(pTarget);
448 T_FileStream_close(file);
449
450 return result;
451 }
452
453 static struct SResource *
454 parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
455 {
456 struct SResource *result = NULL;
457 struct UString *tokenValue;
458 FileStream *file = NULL;
459 char filename[256] = { '\0' };
460 char cs[128] = { '\0' };
461 uint32_t line;
462 UCHARBUF *ucbuf=NULL;
463 const char* cp = NULL;
464 UChar *pTarget = NULL;
465 const UChar *pSource = NULL;
466 int32_t size = 0;
467
468 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
469
470 if(isVerbose()){
471 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
472 }
473
474 if (U_FAILURE(*status))
475 {
476 return NULL;
477 }
478 /* make the filename including the directory */
479 if (state->inputdir != NULL)
480 {
481 uprv_strcat(filename, state->inputdir);
482
483 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
484 {
485 uprv_strcat(filename, U_FILE_SEP_STRING);
486 }
487 }
488
489 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
490
491 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
492
493 if (U_FAILURE(*status))
494 {
495 return NULL;
496 }
497 uprv_strcat(filename, cs);
498
499
500 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
501
502 if (U_FAILURE(*status)) {
503 error(line, "An error occurred while opening the input file %s\n", filename);
504 return NULL;
505 }
506
507 /* We allocate more space than actually required
508 * since the actual size needed for storing UChars
509 * is not known in UTF-8 byte stream
510 */
511 pSource = ucbuf_getBuffer(ucbuf, &size, status);
512 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
513 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
514
515 #if !UCONFIG_NO_TRANSLITERATION
516 size = utrans_stripRules(pSource, size, pTarget, status);
517 #else
518 size = 0;
519 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
520 #endif
521 result = string_open(state->bundle, tag, pTarget, size, NULL, status);
522
523 ucbuf_close(ucbuf);
524 uprv_free(pTarget);
525 T_FileStream_close(file);
526
527 return result;
528 }
529 static ArrayResource* dependencyArray = NULL;
530
531 static struct SResource *
532 parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
533 {
534 struct SResource *result = NULL;
535 struct SResource *elem = NULL;
536 struct UString *tokenValue;
537 uint32_t line;
538 char filename[256] = { '\0' };
539 char cs[128] = { '\0' };
540
541 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
542
543 if(isVerbose()){
544 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
545 }
546
547 if (U_FAILURE(*status))
548 {
549 return NULL;
550 }
551 /* make the filename including the directory */
552 if (state->outputdir != NULL)
553 {
554 uprv_strcat(filename, state->outputdir);
555
556 if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
557 {
558 uprv_strcat(filename, U_FILE_SEP_STRING);
559 }
560 }
561
562 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
563
564 if (U_FAILURE(*status))
565 {
566 return NULL;
567 }
568 uprv_strcat(filename, cs);
569 if(!T_FileStream_file_exists(filename)){
570 if(isStrict()){
571 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
572 }else{
573 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
574 }
575 }
576 if(dependencyArray==NULL){
577 dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
578 }
579 if(tag!=NULL){
580 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
581 }
582 elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
583
584 dependencyArray->add(elem);
585
586 if (U_FAILURE(*status))
587 {
588 return NULL;
589 }
590 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
591 return result;
592 }
593 static struct SResource *
594 parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
595 {
596 struct UString *tokenValue;
597 struct SResource *result = NULL;
598
599 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
600 {
601 return parseUCARules(tag, startline, status);
602 }*/
603 if(isVerbose()){
604 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
605 }
606 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
607
608 if (U_SUCCESS(*status))
609 {
610 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
611 doesn't survive expect either) */
612
613 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
614 if(U_SUCCESS(*status) && result) {
615 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
616
617 if (U_FAILURE(*status))
618 {
619 res_close(result);
620 return NULL;
621 }
622 }
623 }
624
625 return result;
626 }
627
628 static struct SResource *
629 parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
630 {
631 struct UString *tokenValue;
632 struct SResource *result = NULL;
633
634 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
635
636 if(isVerbose()){
637 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
638 }
639
640 if (U_SUCCESS(*status))
641 {
642 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
643 doesn't survive expect either) */
644
645 result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
646
647 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
648
649 if (U_FAILURE(*status))
650 {
651 res_close(result);
652 return NULL;
653 }
654 }
655
656 return result;
657 }
658
659 #if !UCONFIG_NO_COLLATION
660
661 namespace {
662
663 static struct SResource* resLookup(struct SResource* res, const char* key){
664 if (res == res_none() || !res->isTable()) {
665 return NULL;
666 }
667
668 TableResource *list = static_cast<TableResource *>(res);
669 SResource *current = list->fFirst;
670 while (current != NULL) {
671 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
672 return current;
673 }
674 current = current->fNext;
675 }
676 return NULL;
677 }
678
679 class GenrbImporter : public icu::CollationRuleParser::Importer {
680 public:
681 GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out) {}
682 virtual ~GenrbImporter();
683 virtual void getRules(
684 const char *localeID, const char *collationType,
685 UnicodeString &rules,
686 const char *&errorReason, UErrorCode &errorCode);
687
688 private:
689 const char *inputDir;
690 const char *outputDir;
691 };
692
693 GenrbImporter::~GenrbImporter() {}
694
695 void
696 GenrbImporter::getRules(
697 const char *localeID, const char *collationType,
698 UnicodeString &rules,
699 const char *& /*errorReason*/, UErrorCode &errorCode) {
700 CharString filename(localeID, errorCode);
701 for(int32_t i = 0; i < filename.length(); i++){
702 if(filename[i] == '-'){
703 filename.data()[i] = '_';
704 }
705 }
706 filename.append(".txt", errorCode);
707 if (U_FAILURE(errorCode)) {
708 return;
709 }
710 CharString inputDirBuf;
711 CharString openFileName;
712 if(inputDir == NULL) {
713 const char *filenameBegin = uprv_strrchr(filename.data(), U_FILE_SEP_CHAR);
714 if (filenameBegin != NULL) {
715 /*
716 * When a filename ../../../data/root.txt is specified,
717 * we presume that the input directory is ../../../data
718 * This is very important when the resource file includes
719 * another file, like UCARules.txt or thaidict.brk.
720 */
721 StringPiece dir = filename.toStringPiece();
722 const char *filenameLimit = filename.data() + filename.length();
723 dir.remove_suffix((int32_t)(filenameLimit - filenameBegin));
724 inputDirBuf.append(dir, errorCode);
725 inputDir = inputDirBuf.data();
726 }
727 }else{
728 int32_t dirlen = (int32_t)uprv_strlen(inputDir);
729
730 if((filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')) {
731 /*
732 * append the input dir to openFileName if the first char in
733 * filename is not file separator char and the last char input directory is not '.'.
734 * This is to support :
735 * genrb -s. /home/icu/data
736 * genrb -s. icu/data
737 * The user cannot mix notations like
738 * genrb -s. /icu/data --- the absolute path specified. -s redundant
739 * user should use
740 * genrb -s. icu/data --- start from CWD and look in icu/data dir
741 */
742 openFileName.append(inputDir, dirlen, errorCode);
743 if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
744 openFileName.append(U_FILE_SEP_CHAR, errorCode);
745 }
746 }
747 }
748 openFileName.append(filename, errorCode);
749 if(U_FAILURE(errorCode)) {
750 return;
751 }
752 // printf("GenrbImporter::getRules(%s, %s) reads %s\n", localeID, collationType, openFileName.data());
753 const char* cp = "";
754 LocalUCHARBUFPointer ucbuf(
755 ucbuf_open(openFileName.data(), &cp, getShowWarning(), TRUE, &errorCode));
756 if(errorCode == U_FILE_ACCESS_ERROR) {
757 fprintf(stderr, "couldn't open file %s\n", openFileName.data());
758 return;
759 }
760 if (ucbuf.isNull() || U_FAILURE(errorCode)) {
761 fprintf(stderr, "An error occurred processing file %s. Error: %s\n", openFileName.data(), u_errorName(errorCode));
762 return;
763 }
764
765 /* Parse the data into an SRBRoot */
766 LocalPointer<SRBRoot> data(
767 parse(ucbuf.getAlias(), inputDir, outputDir, filename.data(), FALSE, FALSE, &errorCode));
768 if (U_FAILURE(errorCode)) {
769 return;
770 }
771
772 struct SResource *root = data->fRoot;
773 struct SResource *collations = resLookup(root, "collations");
774 if (collations != NULL) {
775 struct SResource *collation = resLookup(collations, collationType);
776 if (collation != NULL) {
777 struct SResource *sequence = resLookup(collation, "Sequence");
778 if (sequence != NULL && sequence->isString()) {
779 // No string pointer aliasing so that we need not hold onto the resource bundle.
780 StringResource *sr = static_cast<StringResource *>(sequence);
781 rules = sr->fString;
782 }
783 }
784 }
785 }
786
787 // Quick-and-dirty escaping function.
788 // Assumes that we are on an ASCII-based platform.
789 static void
790 escape(const UChar *s, char *buffer) {
791 int32_t length = u_strlen(s);
792 int32_t i = 0;
793 for (;;) {
794 UChar32 c;
795 U16_NEXT(s, i, length, c);
796 if (c == 0) {
797 *buffer = 0;
798 return;
799 } else if (0x20 <= c && c <= 0x7e) {
800 // printable ASCII
801 *buffer++ = (char)c; // assumes ASCII-based platform
802 } else {
803 buffer += sprintf(buffer, "\\u%04X", (int)c);
804 }
805 }
806 }
807
808 } // namespace
809
810 #endif // !UCONFIG_NO_COLLATION
811
812 static TableResource *
813 addCollation(ParseState* state, TableResource *result, const char *collationType,
814 uint32_t startline, UErrorCode *status)
815 {
816 // TODO: Use LocalPointer for result, or make caller close it when there is a failure.
817 struct SResource *member = NULL;
818 struct UString *tokenValue;
819 struct UString comment;
820 enum ETokenType token;
821 char subtag[1024];
822 UnicodeString rules;
823 UBool haveRules = FALSE;
824 UVersionInfo version;
825 uint32_t line;
826
827 /* '{' . (name resource)* '}' */
828 version[0]=0; version[1]=0; version[2]=0; version[3]=0;
829
830 for (;;)
831 {
832 ustr_init(&comment);
833 token = getToken(state, &tokenValue, &comment, &line, status);
834
835 if (token == TOK_CLOSE_BRACE)
836 {
837 break;
838 }
839
840 if (token != TOK_STRING)
841 {
842 res_close(result);
843 *status = U_INVALID_FORMAT_ERROR;
844
845 if (token == TOK_EOF)
846 {
847 error(startline, "unterminated table");
848 }
849 else
850 {
851 error(line, "Unexpected token %s", tokenNames[token]);
852 }
853
854 return NULL;
855 }
856
857 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
858
859 if (U_FAILURE(*status))
860 {
861 res_close(result);
862 return NULL;
863 }
864
865 member = parseResource(state, subtag, NULL, status);
866
867 if (U_FAILURE(*status))
868 {
869 res_close(result);
870 return NULL;
871 }
872 if (result == NULL)
873 {
874 // Ignore the parsed resources, continue parsing.
875 }
876 else if (uprv_strcmp(subtag, "Version") == 0 && member->isString())
877 {
878 StringResource *sr = static_cast<StringResource *>(member);
879 char ver[40];
880 int32_t length = sr->length();
881
882 if (length >= UPRV_LENGTHOF(ver))
883 {
884 length = UPRV_LENGTHOF(ver) - 1;
885 }
886
887 sr->fString.extract(0, length, ver, UPRV_LENGTHOF(ver), US_INV);
888 u_versionFromString(version, ver);
889
890 result->add(member, line, *status);
891 member = NULL;
892 }
893 else if(uprv_strcmp(subtag, "%%CollationBin")==0)
894 {
895 /* discard duplicate %%CollationBin if any*/
896 }
897 else if (uprv_strcmp(subtag, "Sequence") == 0 && member->isString())
898 {
899 StringResource *sr = static_cast<StringResource *>(member);
900 rules = sr->fString;
901 haveRules = TRUE;
902 // Defer building the collator until we have seen
903 // all sub-elements of the collation table, including the Version.
904 /* in order to achieve smaller data files, we can direct genrb */
905 /* to omit collation rules */
906 if(!state->omitCollationRules) {
907 result->add(member, line, *status);
908 member = NULL;
909 }
910 }
911 else // Just copy non-special items.
912 {
913 result->add(member, line, *status);
914 member = NULL;
915 }
916 res_close(member); // TODO: use LocalPointer
917 if (U_FAILURE(*status))
918 {
919 res_close(result);
920 return NULL;
921 }
922 }
923
924 if (!haveRules) { return result; }
925
926 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
927 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
928 (void)collationType;
929 #else
930 // CLDR ticket #3949, ICU ticket #8082:
931 // Do not build collation binary data for for-import-only "private" collation rule strings.
932 if (uprv_strncmp(collationType, "private-", 8) == 0) {
933 if(isVerbose()) {
934 printf("Not building %s~%s collation binary\n", state->filename, collationType);
935 }
936 return result;
937 }
938
939 if(!state->makeBinaryCollation) {
940 if(isVerbose()) {
941 printf("Not building %s~%s collation binary\n", state->filename, collationType);
942 }
943 return result;
944 }
945 UErrorCode intStatus = U_ZERO_ERROR;
946 UParseError parseError;
947 uprv_memset(&parseError, 0, sizeof(parseError));
948 GenrbImporter importer(state->inputdir, state->outputdir);
949 const icu::CollationTailoring *base = icu::CollationRoot::getRoot(intStatus);
950 if(U_FAILURE(intStatus)) {
951 error(line, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus));
952 res_close(result);
953 return NULL; // TODO: use LocalUResourceBundlePointer for result
954 }
955 icu::CollationBuilder builder(base, intStatus);
956 if(uprv_strncmp(collationType, "search", 6) == 0) {
957 builder.disableFastLatin(); // build fast-Latin table unless search collator
958 }
959 LocalPointer<icu::CollationTailoring> t(
960 builder.parseAndBuild(rules, version, &importer, &parseError, intStatus));
961 if(U_FAILURE(intStatus)) {
962 const char *reason = builder.getErrorReason();
963 if(reason == NULL) { reason = ""; }
964 error(line, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s %s",
965 state->filename, collationType,
966 (long)parseError.offset, u_errorName(intStatus), reason);
967 if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) {
968 // Print pre- and post-context.
969 char preBuffer[100], postBuffer[100];
970 escape(parseError.preContext, preBuffer);
971 escape(parseError.postContext, postBuffer);
972 error(line, " error context: \"...%s\" ! \"%s...\"", preBuffer, postBuffer);
973 }
974 if(isStrict() || t.isNull()) {
975 *status = intStatus;
976 res_close(result);
977 return NULL;
978 }
979 }
980 icu::LocalMemory<uint8_t> buffer;
981 int32_t capacity = 100000;
982 uint8_t *dest = buffer.allocateInsteadAndCopy(capacity);
983 if(dest == NULL) {
984 fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
985 (long)capacity);
986 *status = U_MEMORY_ALLOCATION_ERROR;
987 res_close(result);
988 return NULL;
989 }
990 int32_t indexes[icu::CollationDataReader::IX_TOTAL_SIZE + 1];
991 int32_t totalSize = icu::CollationDataWriter::writeTailoring(
992 *t, *t->settings, indexes, dest, capacity, intStatus);
993 if(intStatus == U_BUFFER_OVERFLOW_ERROR) {
994 intStatus = U_ZERO_ERROR;
995 capacity = totalSize;
996 dest = buffer.allocateInsteadAndCopy(capacity);
997 if(dest == NULL) {
998 fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
999 (long)capacity);
1000 *status = U_MEMORY_ALLOCATION_ERROR;
1001 res_close(result);
1002 return NULL;
1003 }
1004 totalSize = icu::CollationDataWriter::writeTailoring(
1005 *t, *t->settings, indexes, dest, capacity, intStatus);
1006 }
1007 if(U_FAILURE(intStatus)) {
1008 fprintf(stderr, "CollationDataWriter::writeTailoring() failed: %s\n",
1009 u_errorName(intStatus));
1010 res_close(result);
1011 return NULL;
1012 }
1013 if(isVerbose()) {
1014 printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType);
1015 icu::CollationInfo::printSizes(totalSize, indexes);
1016 if(t->settings->hasReordering()) {
1017 printf("%s~%s collation reordering ranges:\n", state->filename, collationType);
1018 icu::CollationInfo::printReorderRanges(
1019 *t->data, t->settings->reorderCodes, t->settings->reorderCodesLength);
1020 }
1021 }
1022 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", totalSize, dest, NULL, NULL, status);
1023 result->add(collationBin, line, *status);
1024 if (U_FAILURE(*status)) {
1025 res_close(result);
1026 return NULL;
1027 }
1028 #endif
1029 return result;
1030 }
1031
1032 static UBool
1033 keepCollationType(const char * /*type*/) {
1034 return TRUE;
1035 }
1036
1037 static struct SResource *
1038 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
1039 {
1040 TableResource *result = NULL;
1041 struct SResource *member = NULL;
1042 struct UString *tokenValue;
1043 struct UString comment;
1044 enum ETokenType token;
1045 char subtag[1024], typeKeyword[1024];
1046 uint32_t line;
1047
1048 result = table_open(state->bundle, tag, NULL, status);
1049
1050 if (result == NULL || U_FAILURE(*status))
1051 {
1052 return NULL;
1053 }
1054 if(isVerbose()){
1055 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1056 }
1057 if(!newCollation) {
1058 return addCollation(state, result, "(no type)", startline, status);
1059 }
1060 else {
1061 for(;;) {
1062 ustr_init(&comment);
1063 token = getToken(state, &tokenValue, &comment, &line, status);
1064
1065 if (token == TOK_CLOSE_BRACE)
1066 {
1067 return result;
1068 }
1069
1070 if (token != TOK_STRING)
1071 {
1072 res_close(result);
1073 *status = U_INVALID_FORMAT_ERROR;
1074
1075 if (token == TOK_EOF)
1076 {
1077 error(startline, "unterminated table");
1078 }
1079 else
1080 {
1081 error(line, "Unexpected token %s", tokenNames[token]);
1082 }
1083
1084 return NULL;
1085 }
1086
1087 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1088
1089 if (U_FAILURE(*status))
1090 {
1091 res_close(result);
1092 return NULL;
1093 }
1094
1095 if (uprv_strcmp(subtag, "default") == 0)
1096 {
1097 member = parseResource(state, subtag, NULL, status);
1098
1099 if (U_FAILURE(*status))
1100 {
1101 res_close(result);
1102 return NULL;
1103 }
1104
1105 result->add(member, line, *status);
1106 }
1107 else
1108 {
1109 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
1110 /* this probably needs to be refactored or recursively use the parser */
1111 /* first we assume that our collation table won't have the explicit type */
1112 /* then, we cannot handle aliases */
1113 if(token == TOK_OPEN_BRACE) {
1114 token = getToken(state, &tokenValue, &comment, &line, status);
1115 TableResource *collationRes;
1116 if (keepCollationType(subtag)) {
1117 collationRes = table_open(state->bundle, subtag, NULL, status);
1118 } else {
1119 collationRes = NULL;
1120 }
1121 // need to parse the collation data regardless
1122 collationRes = addCollation(state, collationRes, subtag, startline, status);
1123 if (collationRes != NULL) {
1124 result->add(collationRes, startline, *status);
1125 }
1126 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
1127 /* we could have a table too */
1128 token = peekToken(state, 1, &tokenValue, &line, &comment, status);
1129 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
1130 if(uprv_strcmp(typeKeyword, "alias") == 0) {
1131 member = parseResource(state, subtag, NULL, status);
1132 if (U_FAILURE(*status))
1133 {
1134 res_close(result);
1135 return NULL;
1136 }
1137
1138 result->add(member, line, *status);
1139 } else {
1140 res_close(result);
1141 *status = U_INVALID_FORMAT_ERROR;
1142 return NULL;
1143 }
1144 } else {
1145 res_close(result);
1146 *status = U_INVALID_FORMAT_ERROR;
1147 return NULL;
1148 }
1149 }
1150
1151 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1152
1153 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1154
1155 if (U_FAILURE(*status))
1156 {
1157 res_close(result);
1158 return NULL;
1159 }
1160 }
1161 }
1162 }
1163
1164 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1165 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1166 static struct SResource *
1167 realParseTable(ParseState* state, TableResource *table, char *tag, uint32_t startline, UErrorCode *status)
1168 {
1169 struct SResource *member = NULL;
1170 struct UString *tokenValue=NULL;
1171 struct UString comment;
1172 enum ETokenType token;
1173 char subtag[1024];
1174 uint32_t line;
1175 UBool readToken = FALSE;
1176
1177 /* '{' . (name resource)* '}' */
1178
1179 if(isVerbose()){
1180 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1181 }
1182 for (;;)
1183 {
1184 ustr_init(&comment);
1185 token = getToken(state, &tokenValue, &comment, &line, status);
1186
1187 if (token == TOK_CLOSE_BRACE)
1188 {
1189 if (!readToken) {
1190 warning(startline, "Encountered empty table");
1191 }
1192 return table;
1193 }
1194
1195 if (token != TOK_STRING)
1196 {
1197 *status = U_INVALID_FORMAT_ERROR;
1198
1199 if (token == TOK_EOF)
1200 {
1201 error(startline, "unterminated table");
1202 }
1203 else
1204 {
1205 error(line, "unexpected token %s", tokenNames[token]);
1206 }
1207
1208 return NULL;
1209 }
1210
1211 if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
1212 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1213 } else {
1214 *status = U_INVALID_FORMAT_ERROR;
1215 error(line, "invariant characters required for table keys");
1216 return NULL;
1217 }
1218
1219 if (U_FAILURE(*status))
1220 {
1221 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
1222 return NULL;
1223 }
1224
1225 member = parseResource(state, subtag, &comment, status);
1226
1227 if (member == NULL || U_FAILURE(*status))
1228 {
1229 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
1230 return NULL;
1231 }
1232
1233 table->add(member, line, *status);
1234
1235 if (U_FAILURE(*status))
1236 {
1237 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
1238 return NULL;
1239 }
1240 readToken = TRUE;
1241 ustr_deinit(&comment);
1242 }
1243
1244 /* not reached */
1245 /* A compiler warning will appear if all paths don't contain a return statement. */
1246 /* *status = U_INTERNAL_PROGRAM_ERROR;
1247 return NULL;*/
1248 }
1249
1250 static struct SResource *
1251 parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1252 {
1253 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
1254 {
1255 return parseCollationElements(state, tag, startline, FALSE, status);
1256 }
1257 if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1258 {
1259 return parseCollationElements(state, tag, startline, TRUE, status);
1260 }
1261 if(isVerbose()){
1262 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1263 }
1264
1265 TableResource *result = table_open(state->bundle, tag, comment, status);
1266
1267 if (result == NULL || U_FAILURE(*status))
1268 {
1269 return NULL;
1270 }
1271 return realParseTable(state, result, tag, startline, status);
1272 }
1273
1274 static struct SResource *
1275 parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1276 {
1277 struct SResource *member = NULL;
1278 struct UString *tokenValue;
1279 struct UString memberComments;
1280 enum ETokenType token;
1281 UBool readToken = FALSE;
1282
1283 ArrayResource *result = array_open(state->bundle, tag, comment, status);
1284
1285 if (result == NULL || U_FAILURE(*status))
1286 {
1287 return NULL;
1288 }
1289 if(isVerbose()){
1290 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1291 }
1292
1293 ustr_init(&memberComments);
1294
1295 /* '{' . resource [','] '}' */
1296 for (;;)
1297 {
1298 /* reset length */
1299 ustr_setlen(&memberComments, 0, status);
1300
1301 /* check for end of array, but don't consume next token unless it really is the end */
1302 token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
1303
1304
1305 if (token == TOK_CLOSE_BRACE)
1306 {
1307 getToken(state, NULL, NULL, NULL, status);
1308 if (!readToken) {
1309 warning(startline, "Encountered empty array");
1310 }
1311 break;
1312 }
1313
1314 if (token == TOK_EOF)
1315 {
1316 res_close(result);
1317 *status = U_INVALID_FORMAT_ERROR;
1318 error(startline, "unterminated array");
1319 return NULL;
1320 }
1321
1322 /* string arrays are a special case */
1323 if (token == TOK_STRING)
1324 {
1325 getToken(state, &tokenValue, &memberComments, NULL, status);
1326 member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
1327 }
1328 else
1329 {
1330 member = parseResource(state, NULL, &memberComments, status);
1331 }
1332
1333 if (member == NULL || U_FAILURE(*status))
1334 {
1335 res_close(result);
1336 return NULL;
1337 }
1338
1339 result->add(member);
1340
1341 /* eat optional comma if present */
1342 token = peekToken(state, 0, NULL, NULL, NULL, status);
1343
1344 if (token == TOK_COMMA)
1345 {
1346 getToken(state, NULL, NULL, NULL, status);
1347 }
1348
1349 if (U_FAILURE(*status))
1350 {
1351 res_close(result);
1352 return NULL;
1353 }
1354 readToken = TRUE;
1355 }
1356
1357 ustr_deinit(&memberComments);
1358 return result;
1359 }
1360
1361 static struct SResource *
1362 parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1363 {
1364 enum ETokenType token;
1365 char *string;
1366 int32_t value;
1367 UBool readToken = FALSE;
1368 char *stopstring;
1369 uint32_t len;
1370 struct UString memberComments;
1371
1372 IntVectorResource *result = intvector_open(state->bundle, tag, comment, status);
1373
1374 if (result == NULL || U_FAILURE(*status))
1375 {
1376 return NULL;
1377 }
1378
1379 if(isVerbose()){
1380 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1381 }
1382 ustr_init(&memberComments);
1383 /* '{' . string [','] '}' */
1384 for (;;)
1385 {
1386 ustr_setlen(&memberComments, 0, status);
1387
1388 /* check for end of array, but don't consume next token unless it really is the end */
1389 token = peekToken(state, 0, NULL, NULL,&memberComments, status);
1390
1391 if (token == TOK_CLOSE_BRACE)
1392 {
1393 /* it's the end, consume the close brace */
1394 getToken(state, NULL, NULL, NULL, status);
1395 if (!readToken) {
1396 warning(startline, "Encountered empty int vector");
1397 }
1398 ustr_deinit(&memberComments);
1399 return result;
1400 }
1401
1402 string = getInvariantString(state, NULL, NULL, status);
1403
1404 if (U_FAILURE(*status))
1405 {
1406 res_close(result);
1407 return NULL;
1408 }
1409
1410 /* For handling illegal char in the Intvector */
1411 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1412 len=(uint32_t)(stopstring-string);
1413
1414 if(len==uprv_strlen(string))
1415 {
1416 result->add(value, *status);
1417 uprv_free(string);
1418 token = peekToken(state, 0, NULL, NULL, NULL, status);
1419 }
1420 else
1421 {
1422 uprv_free(string);
1423 *status=U_INVALID_CHAR_FOUND;
1424 }
1425
1426 if (U_FAILURE(*status))
1427 {
1428 res_close(result);
1429 return NULL;
1430 }
1431
1432 /* the comma is optional (even though it is required to prevent the reader from concatenating
1433 consecutive entries) so that a missing comma on the last entry isn't an error */
1434 if (token == TOK_COMMA)
1435 {
1436 getToken(state, NULL, NULL, NULL, status);
1437 }
1438 readToken = TRUE;
1439 }
1440
1441 /* not reached */
1442 /* A compiler warning will appear if all paths don't contain a return statement. */
1443 /* intvector_close(result, status);
1444 *status = U_INTERNAL_PROGRAM_ERROR;
1445 return NULL;*/
1446 }
1447
1448 static struct SResource *
1449 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1450 {
1451 uint32_t line;
1452 LocalMemory<char> string(getInvariantString(state, &line, NULL, status));
1453 if (string.isNull() || U_FAILURE(*status))
1454 {
1455 return NULL;
1456 }
1457
1458 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1459 if (U_FAILURE(*status))
1460 {
1461 return NULL;
1462 }
1463
1464 if(isVerbose()){
1465 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1466 }
1467
1468 uint32_t count = (uint32_t)uprv_strlen(string.getAlias());
1469 if (count > 0){
1470 if((count % 2)==0){
1471 LocalMemory<uint8_t> value;
1472 if (value.allocateInsteadAndCopy(count) == NULL)
1473 {
1474 *status = U_MEMORY_ALLOCATION_ERROR;
1475 return NULL;
1476 }
1477
1478 char toConv[3] = {'\0', '\0', '\0'};
1479 for (uint32_t i = 0; i < count; i += 2)
1480 {
1481 toConv[0] = string[i];
1482 toConv[1] = string[i + 1];
1483
1484 char *stopstring;
1485 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
1486 uint32_t len=(uint32_t)(stopstring-toConv);
1487
1488 if(len!=2)
1489 {
1490 *status=U_INVALID_CHAR_FOUND;
1491 return NULL;
1492 }
1493 }
1494
1495 return bin_open(state->bundle, tag, count >> 1, value.getAlias(), NULL, comment, status);
1496 }
1497 else
1498 {
1499 *status = U_INVALID_CHAR_FOUND;
1500 error(line, "Encountered invalid binary value (length is odd)");
1501 return NULL;
1502 }
1503 }
1504 else
1505 {
1506 warning(startline, "Encountered empty binary value");
1507 return bin_open(state->bundle, tag, 0, NULL, "", comment, status);
1508 }
1509 }
1510
1511 static struct SResource *
1512 parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1513 {
1514 struct SResource *result = NULL;
1515 int32_t value;
1516 char *string;
1517 char *stopstring;
1518 uint32_t len;
1519
1520 string = getInvariantString(state, NULL, NULL, status);
1521
1522 if (string == NULL || U_FAILURE(*status))
1523 {
1524 return NULL;
1525 }
1526
1527 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1528
1529 if (U_FAILURE(*status))
1530 {
1531 uprv_free(string);
1532 return NULL;
1533 }
1534
1535 if(isVerbose()){
1536 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1537 }
1538
1539 if (uprv_strlen(string) <= 0)
1540 {
1541 warning(startline, "Encountered empty integer. Default value is 0.");
1542 }
1543
1544 /* Allow integer support for hexdecimal, octal digit and decimal*/
1545 /* and handle illegal char in the integer*/
1546 value = uprv_strtoul(string, &stopstring, 0);
1547 len=(uint32_t)(stopstring-string);
1548 if(len==uprv_strlen(string))
1549 {
1550 result = int_open(state->bundle, tag, value, comment, status);
1551 }
1552 else
1553 {
1554 *status=U_INVALID_CHAR_FOUND;
1555 }
1556 uprv_free(string);
1557
1558 return result;
1559 }
1560
1561 static struct SResource *
1562 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1563 {
1564 uint32_t line;
1565 LocalMemory<char> filename(getInvariantString(state, &line, NULL, status));
1566 if (U_FAILURE(*status))
1567 {
1568 return NULL;
1569 }
1570
1571 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1572
1573 if (U_FAILURE(*status))
1574 {
1575 return NULL;
1576 }
1577
1578 if(isVerbose()){
1579 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1580 }
1581
1582 /* Open the input file for reading */
1583 CharString fullname;
1584 if (state->inputdir != NULL) {
1585 fullname.append(state->inputdir, *status);
1586 }
1587 fullname.appendPathPart(filename.getAlias(), *status);
1588 if (U_FAILURE(*status)) {
1589 return NULL;
1590 }
1591
1592 FileStream *file = T_FileStream_open(fullname.data(), "rb");
1593 if (file == NULL)
1594 {
1595 error(line, "couldn't open input file %s", filename.getAlias());
1596 *status = U_FILE_ACCESS_ERROR;
1597 return NULL;
1598 }
1599
1600 int32_t len = T_FileStream_size(file);
1601 LocalMemory<uint8_t> data;
1602 if(data.allocateInsteadAndCopy(len) == NULL)
1603 {
1604 *status = U_MEMORY_ALLOCATION_ERROR;
1605 T_FileStream_close (file);
1606 return NULL;
1607 }
1608
1609 /* int32_t numRead = */ T_FileStream_read(file, data.getAlias(), len);
1610 T_FileStream_close (file);
1611
1612 return bin_open(state->bundle, tag, len, data.getAlias(), fullname.data(), comment, status);
1613 }
1614
1615 static struct SResource *
1616 parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1617 {
1618 struct SResource *result;
1619 int32_t len=0;
1620 char *filename;
1621 uint32_t line;
1622 UChar *pTarget = NULL;
1623
1624 UCHARBUF *ucbuf;
1625 char *fullname = NULL;
1626 int32_t count = 0;
1627 const char* cp = NULL;
1628 const UChar* uBuffer = NULL;
1629
1630 filename = getInvariantString(state, &line, NULL, status);
1631 count = (int32_t)uprv_strlen(filename);
1632
1633 if (U_FAILURE(*status))
1634 {
1635 return NULL;
1636 }
1637
1638 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1639
1640 if (U_FAILURE(*status))
1641 {
1642 uprv_free(filename);
1643 return NULL;
1644 }
1645
1646 if(isVerbose()){
1647 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1648 }
1649
1650 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1651 /* test for NULL */
1652 if(fullname == NULL)
1653 {
1654 *status = U_MEMORY_ALLOCATION_ERROR;
1655 uprv_free(filename);
1656 return NULL;
1657 }
1658
1659 if(state->inputdir!=NULL){
1660 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1661 {
1662
1663 uprv_strcpy(fullname, state->inputdir);
1664
1665 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1666 fullname[state->inputdirLength + 1] = '\0';
1667
1668 uprv_strcat(fullname, filename);
1669 }
1670 else
1671 {
1672 uprv_strcpy(fullname, state->inputdir);
1673 uprv_strcat(fullname, filename);
1674 }
1675 }else{
1676 uprv_strcpy(fullname,filename);
1677 }
1678
1679 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1680
1681 if (U_FAILURE(*status)) {
1682 error(line, "couldn't open input file %s\n", filename);
1683 return NULL;
1684 }
1685
1686 uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
1687 result = string_open(state->bundle, tag, uBuffer, len, comment, status);
1688
1689 ucbuf_close(ucbuf);
1690
1691 uprv_free(pTarget);
1692
1693 uprv_free(filename);
1694 uprv_free(fullname);
1695
1696 return result;
1697 }
1698
1699
1700
1701
1702
1703 U_STRING_DECL(k_type_string, "string", 6);
1704 U_STRING_DECL(k_type_binary, "binary", 6);
1705 U_STRING_DECL(k_type_bin, "bin", 3);
1706 U_STRING_DECL(k_type_table, "table", 5);
1707 U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17);
1708 U_STRING_DECL(k_type_int, "int", 3);
1709 U_STRING_DECL(k_type_integer, "integer", 7);
1710 U_STRING_DECL(k_type_array, "array", 5);
1711 U_STRING_DECL(k_type_alias, "alias", 5);
1712 U_STRING_DECL(k_type_intvector, "intvector", 9);
1713 U_STRING_DECL(k_type_import, "import", 6);
1714 U_STRING_DECL(k_type_include, "include", 7);
1715
1716 /* Various non-standard processing plugins that create one or more special resources. */
1717 U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1718 U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18);
1719 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23);
1720 U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19);
1721
1722 typedef enum EResourceType
1723 {
1724 RESTYPE_UNKNOWN,
1725 RESTYPE_STRING,
1726 RESTYPE_BINARY,
1727 RESTYPE_TABLE,
1728 RESTYPE_TABLE_NO_FALLBACK,
1729 RESTYPE_INTEGER,
1730 RESTYPE_ARRAY,
1731 RESTYPE_ALIAS,
1732 RESTYPE_INTVECTOR,
1733 RESTYPE_IMPORT,
1734 RESTYPE_INCLUDE,
1735 RESTYPE_PROCESS_UCA_RULES,
1736 RESTYPE_PROCESS_COLLATION,
1737 RESTYPE_PROCESS_TRANSLITERATOR,
1738 RESTYPE_PROCESS_DEPENDENCY,
1739 RESTYPE_RESERVED
1740 } EResourceType;
1741
1742 static struct {
1743 const char *nameChars; /* only used for debugging */
1744 const UChar *nameUChars;
1745 ParseResourceFunction *parseFunction;
1746 } gResourceTypes[] = {
1747 {"Unknown", NULL, NULL},
1748 {"string", k_type_string, parseString},
1749 {"binary", k_type_binary, parseBinary},
1750 {"table", k_type_table, parseTable},
1751 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1752 {"integer", k_type_integer, parseInteger},
1753 {"array", k_type_array, parseArray},
1754 {"alias", k_type_alias, parseAlias},
1755 {"intvector", k_type_intvector, parseIntVector},
1756 {"import", k_type_import, parseImport},
1757 {"include", k_type_include, parseInclude},
1758 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1759 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1760 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1761 {"process(dependency)", k_type_plugin_dependency, parseDependency},
1762 {"reserved", NULL, NULL}
1763 };
1764
1765 void initParser()
1766 {
1767 U_STRING_INIT(k_type_string, "string", 6);
1768 U_STRING_INIT(k_type_binary, "binary", 6);
1769 U_STRING_INIT(k_type_bin, "bin", 3);
1770 U_STRING_INIT(k_type_table, "table", 5);
1771 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17);
1772 U_STRING_INIT(k_type_int, "int", 3);
1773 U_STRING_INIT(k_type_integer, "integer", 7);
1774 U_STRING_INIT(k_type_array, "array", 5);
1775 U_STRING_INIT(k_type_alias, "alias", 5);
1776 U_STRING_INIT(k_type_intvector, "intvector", 9);
1777 U_STRING_INIT(k_type_import, "import", 6);
1778 U_STRING_INIT(k_type_include, "include", 7);
1779
1780 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1781 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18);
1782 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23);
1783 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19);
1784 }
1785
1786 static inline UBool isTable(enum EResourceType type) {
1787 return (UBool)(type==RESTYPE_TABLE || type==RESTYPE_TABLE_NO_FALLBACK);
1788 }
1789
1790 static enum EResourceType
1791 parseResourceType(ParseState* state, UErrorCode *status)
1792 {
1793 struct UString *tokenValue;
1794 struct UString comment;
1795 enum EResourceType result = RESTYPE_UNKNOWN;
1796 uint32_t line=0;
1797 ustr_init(&comment);
1798 expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
1799
1800 if (U_FAILURE(*status))
1801 {
1802 return RESTYPE_UNKNOWN;
1803 }
1804
1805 *status = U_ZERO_ERROR;
1806
1807 /* Search for normal types */
1808 result=RESTYPE_UNKNOWN;
1809 while ((result=(EResourceType)(result+1)) < RESTYPE_RESERVED) {
1810 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1811 break;
1812 }
1813 }
1814 /* Now search for the aliases */
1815 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
1816 result = RESTYPE_INTEGER;
1817 }
1818 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
1819 result = RESTYPE_BINARY;
1820 }
1821 else if (result == RESTYPE_RESERVED) {
1822 char tokenBuffer[1024];
1823 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1824 tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1825 *status = U_INVALID_FORMAT_ERROR;
1826 error(line, "unknown resource type '%s'", tokenBuffer);
1827 }
1828
1829 return result;
1830 }
1831
1832 /* parse a non-top-level resource */
1833 static struct SResource *
1834 parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
1835 {
1836 enum ETokenType token;
1837 enum EResourceType resType = RESTYPE_UNKNOWN;
1838 ParseResourceFunction *parseFunction = NULL;
1839 struct UString *tokenValue;
1840 uint32_t startline;
1841 uint32_t line;
1842
1843
1844 token = getToken(state, &tokenValue, NULL, &startline, status);
1845
1846 if(isVerbose()){
1847 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1848 }
1849
1850 /* name . [ ':' type ] '{' resource '}' */
1851 /* This function parses from the colon onwards. If the colon is present, parse the
1852 type then try to parse a resource of that type. If there is no explicit type,
1853 work it out using the lookahead tokens. */
1854 switch (token)
1855 {
1856 case TOK_EOF:
1857 *status = U_INVALID_FORMAT_ERROR;
1858 error(startline, "Unexpected EOF encountered");
1859 return NULL;
1860
1861 case TOK_ERROR:
1862 *status = U_INVALID_FORMAT_ERROR;
1863 return NULL;
1864
1865 case TOK_COLON:
1866 resType = parseResourceType(state, status);
1867 expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
1868
1869 if (U_FAILURE(*status))
1870 {
1871 return NULL;
1872 }
1873
1874 break;
1875
1876 case TOK_OPEN_BRACE:
1877 break;
1878
1879 default:
1880 *status = U_INVALID_FORMAT_ERROR;
1881 error(startline, "syntax error while reading a resource, expected '{' or ':'");
1882 return NULL;
1883 }
1884
1885
1886 if (resType == RESTYPE_UNKNOWN)
1887 {
1888 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
1889 We could have any of the following:
1890 { { => array (nested)
1891 { :/} => array
1892 { string , => string array
1893
1894 { string { => table
1895
1896 { string :/{ => table
1897 { string } => string
1898 */
1899
1900 token = peekToken(state, 0, NULL, &line, NULL,status);
1901
1902 if (U_FAILURE(*status))
1903 {
1904 return NULL;
1905 }
1906
1907 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
1908 {
1909 resType = RESTYPE_ARRAY;
1910 }
1911 else if (token == TOK_STRING)
1912 {
1913 token = peekToken(state, 1, NULL, &line, NULL, status);
1914
1915 if (U_FAILURE(*status))
1916 {
1917 return NULL;
1918 }
1919
1920 switch (token)
1921 {
1922 case TOK_COMMA: resType = RESTYPE_ARRAY; break;
1923 case TOK_OPEN_BRACE: resType = RESTYPE_TABLE; break;
1924 case TOK_CLOSE_BRACE: resType = RESTYPE_STRING; break;
1925 case TOK_COLON: resType = RESTYPE_TABLE; break;
1926 default:
1927 *status = U_INVALID_FORMAT_ERROR;
1928 error(line, "Unexpected token after string, expected ',', '{' or '}'");
1929 return NULL;
1930 }
1931 }
1932 else
1933 {
1934 *status = U_INVALID_FORMAT_ERROR;
1935 error(line, "Unexpected token after '{'");
1936 return NULL;
1937 }
1938
1939 /* printf("Type guessed as %s\n", resourceNames[resType]); */
1940 } else if(resType == RESTYPE_TABLE_NO_FALLBACK) {
1941 *status = U_INVALID_FORMAT_ERROR;
1942 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
1943 return NULL;
1944 }
1945
1946
1947 /* We should now know what we need to parse next, so call the appropriate parser
1948 function and return. */
1949 parseFunction = gResourceTypes[resType].parseFunction;
1950 if (parseFunction != NULL) {
1951 return parseFunction(state, tag, startline, comment, status);
1952 }
1953 else {
1954 *status = U_INTERNAL_PROGRAM_ERROR;
1955 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
1956 }
1957
1958 return NULL;
1959 }
1960
1961 /* parse the top-level resource */
1962 struct SRBRoot *
1963 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, const char *filename,
1964 UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status)
1965 {
1966 struct UString *tokenValue;
1967 struct UString comment;
1968 uint32_t line;
1969 enum EResourceType bundleType;
1970 enum ETokenType token;
1971 ParseState state;
1972 uint32_t i;
1973
1974
1975 for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
1976 {
1977 ustr_init(&state.lookahead[i].value);
1978 ustr_init(&state.lookahead[i].comment);
1979 }
1980
1981 initLookahead(&state, buf, status);
1982
1983 state.inputdir = inputDir;
1984 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
1985 state.outputdir = outputDir;
1986 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
1987 state.filename = filename;
1988 state.makeBinaryCollation = makeBinaryCollation;
1989 state.omitCollationRules = omitCollationRules;
1990
1991 ustr_init(&comment);
1992 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
1993
1994 state.bundle = new SRBRoot(&comment, FALSE, *status);
1995
1996 if (state.bundle == NULL || U_FAILURE(*status))
1997 {
1998 return NULL;
1999 }
2000
2001
2002 state.bundle->setLocale(tokenValue->fChars, *status);
2003
2004 /* The following code is to make Empty bundle work no matter with :table specifer or not */
2005 token = getToken(&state, NULL, NULL, &line, status);
2006 if(token==TOK_COLON) {
2007 *status=U_ZERO_ERROR;
2008 bundleType=parseResourceType(&state, status);
2009
2010 if(isTable(bundleType))
2011 {
2012 expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
2013 }
2014 else
2015 {
2016 *status=U_PARSE_ERROR;
2017 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
2018 }
2019 }
2020 else
2021 {
2022 /* not a colon */
2023 if(token==TOK_OPEN_BRACE)
2024 {
2025 *status=U_ZERO_ERROR;
2026 bundleType=RESTYPE_TABLE;
2027 }
2028 else
2029 {
2030 /* neither colon nor open brace */
2031 *status=U_PARSE_ERROR;
2032 bundleType=RESTYPE_UNKNOWN;
2033 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
2034 }
2035 }
2036
2037 if (U_FAILURE(*status))
2038 {
2039 delete state.bundle;
2040 return NULL;
2041 }
2042
2043 if(bundleType==RESTYPE_TABLE_NO_FALLBACK) {
2044 /*
2045 * Parse a top-level table with the table(nofallback) declaration.
2046 * This is the same as a regular table, but also sets the
2047 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2048 */
2049 state.bundle->fNoFallback=TRUE;
2050 }
2051 /* top-level tables need not handle special table names like "collations" */
2052 assert(!state.bundle->fIsPoolBundle);
2053 assert(state.bundle->fRoot->fType == URES_TABLE);
2054 TableResource *rootTable = static_cast<TableResource *>(state.bundle->fRoot);
2055 realParseTable(&state, rootTable, NULL, line, status);
2056 if(dependencyArray!=NULL){
2057 rootTable->add(dependencyArray, 0, *status);
2058 dependencyArray = NULL;
2059 }
2060 if (U_FAILURE(*status))
2061 {
2062 delete state.bundle;
2063 res_close(dependencyArray);
2064 return NULL;
2065 }
2066
2067 if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
2068 {
2069 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
2070 if(isStrict()){
2071 *status = U_INVALID_FORMAT_ERROR;
2072 return NULL;
2073 }
2074 }
2075
2076 cleanupLookahead(&state);
2077 ustr_deinit(&comment);
2078 return state.bundle;
2079 }