]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/genrb/parse.cpp
ICU-57163.0.1.tar.gz
[apple/icu.git] / icuSources / tools / genrb / parse.cpp
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
3*
b331163b 4* Copyright (C) 1998-2015, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8*
4388f060 9* File parse.cpp
b75a7d8f
A
10*
11* Modification History:
12*
13* Date Name Description
14* 05/26/99 stephen Creation.
15* 02/25/00 weiv Overhaul to write udata
16* 5/10/01 Ram removed ustdio dependency
17* 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
18*******************************************************************************
19*/
20
57a6839d
A
21// Safer use of UnicodeString.
22#ifndef UNISTR_FROM_CHAR_EXPLICIT
23# define UNISTR_FROM_CHAR_EXPLICIT explicit
24#endif
25
26// Less important, but still a good idea.
27#ifndef UNISTR_FROM_STRING_EXPLICIT
28# define UNISTR_FROM_STRING_EXPLICIT explicit
29#endif
30
2ca993e8 31#include <assert.h>
b75a7d8f
A
32#include "parse.h"
33#include "errmsg.h"
34#include "uhash.h"
35#include "cmemory.h"
36#include "cstring.h"
374ca955 37#include "uinvchar.h"
b75a7d8f
A
38#include "read.h"
39#include "ustr.h"
40#include "reslist.h"
73c04bcf 41#include "rbt_pars.h"
729e4ab9 42#include "genrb.h"
2ca993e8
A
43#include "unicode/stringpiece.h"
44#include "unicode/unistr.h"
b75a7d8f 45#include "unicode/ustring.h"
729e4ab9 46#include "unicode/uscript.h"
57a6839d 47#include "unicode/utf16.h"
b75a7d8f 48#include "unicode/putil.h"
2ca993e8 49#include "charstr.h"
57a6839d
A
50#include "collationbuilder.h"
51#include "collationdata.h"
52#include "collationdatareader.h"
53#include "collationdatawriter.h"
54#include "collationfastlatinbuilder.h"
55#include "collationinfo.h"
56#include "collationroot.h"
57#include "collationruleparser.h"
58#include "collationtailoring.h"
73c04bcf 59#include <stdio.h>
b75a7d8f
A
60
61/* Number of tokens to read ahead of the current stream position */
374ca955 62#define MAX_LOOKAHEAD 3
b75a7d8f 63
b75a7d8f
A
64#define CR 0x000D
65#define LF 0x000A
66#define SPACE 0x0020
374ca955 67#define TAB 0x0009
b75a7d8f
A
68#define ESCAPE 0x005C
69#define HASH 0x0023
70#define QUOTE 0x0027
73c04bcf 71#define ZERO 0x0030
b75a7d8f
A
72#define STARTCOMMAND 0x005B
73#define ENDCOMMAND 0x005D
73c04bcf
A
74#define OPENSQBRACKET 0x005B
75#define CLOSESQBRACKET 0x005D
b75a7d8f 76
2ca993e8
A
77using icu::CharString;
78using icu::LocalMemory;
57a6839d 79using icu::LocalPointer;
2ca993e8
A
80using icu::LocalUCHARBUFPointer;
81using icu::StringPiece;
57a6839d
A
82using icu::UnicodeString;
83
b75a7d8f
A
84struct Lookahead
85{
86 enum ETokenType type;
374ca955
A
87 struct UString value;
88 struct UString comment;
89 uint32_t line;
b75a7d8f
A
90};
91
92/* keep in sync with token defines in read.h */
374ca955 93const char *tokenNames[TOK_TOKEN_COUNT] =
b75a7d8f
A
94{
95 "string", /* A string token, such as "MonthNames" */
96 "'{'", /* An opening brace character */
97 "'}'", /* A closing brace character */
98 "','", /* A comma */
99 "':'", /* A colon */
100
101 "<end of file>", /* End of the file has been reached successfully */
374ca955 102 "<end of line>"
b75a7d8f
A
103};
104
105/* Just to store "TRUE" */
51004dcb 106//static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
b75a7d8f 107
729e4ab9
A
108typedef struct {
109 struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
110 uint32_t lookaheadPosition;
111 UCHARBUF *buffer;
112 struct SRBRoot *bundle;
113 const char *inputdir;
114 uint32_t inputdirLength;
115 const char *outputdir;
116 uint32_t outputdirLength;
57a6839d 117 const char *filename;
4388f060 118 UBool makeBinaryCollation;
57a6839d 119 UBool omitCollationRules;
729e4ab9 120} ParseState;
b75a7d8f 121
729e4ab9
A
122typedef struct SResource *
123ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
124
125static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
b75a7d8f 126
b75a7d8f
A
127/* The nature of the lookahead buffer:
128 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
129 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
130 When getToken is called, the current pointer is moved to the next slot and the
131 old slot is filled with the next token from the reader by calling getNextToken.
132 The token values are stored in the slot, which means that token values don't
133 survive a call to getToken, ie.
134
135 UString *value;
136
137 getToken(&value, NULL, status);
138 getToken(NULL, NULL, status); bad - value is now a different string
139*/
140static void
729e4ab9 141initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
b75a7d8f
A
142{
143 static uint32_t initTypeStrings = 0;
144 uint32_t i;
145
146 if (!initTypeStrings)
147 {
148 initTypeStrings = 1;
149 }
150
729e4ab9
A
151 state->lookaheadPosition = 0;
152 state->buffer = buf;
b75a7d8f
A
153
154 resetLineNumber();
155
156 for (i = 0; i < MAX_LOOKAHEAD; i++)
157 {
729e4ab9 158 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
b75a7d8f
A
159 if (U_FAILURE(*status))
160 {
161 return;
162 }
163 }
164
165 *status = U_ZERO_ERROR;
166}
167
46f4442e 168static void
729e4ab9 169cleanupLookahead(ParseState* state)
46f4442e
A
170{
171 uint32_t i;
51004dcb 172 for (i = 0; i <= MAX_LOOKAHEAD; i++)
46f4442e 173 {
729e4ab9
A
174 ustr_deinit(&state->lookahead[i].value);
175 ustr_deinit(&state->lookahead[i].comment);
46f4442e
A
176 }
177
178}
179
b75a7d8f 180static enum ETokenType
729e4ab9 181getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
b75a7d8f
A
182{
183 enum ETokenType result;
184 uint32_t i;
185
729e4ab9 186 result = state->lookahead[state->lookaheadPosition].type;
b75a7d8f
A
187
188 if (tokenValue != NULL)
189 {
729e4ab9 190 *tokenValue = &state->lookahead[state->lookaheadPosition].value;
b75a7d8f
A
191 }
192
193 if (linenumber != NULL)
194 {
729e4ab9 195 *linenumber = state->lookahead[state->lookaheadPosition].line;
b75a7d8f
A
196 }
197
374ca955
A
198 if (comment != NULL)
199 {
729e4ab9 200 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
374ca955
A
201 }
202
729e4ab9
A
203 i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
204 state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
205 ustr_setlen(&state->lookahead[i].comment, 0, status);
206 ustr_setlen(&state->lookahead[i].value, 0, status);
207 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
b75a7d8f
A
208
209 /* printf("getToken, returning %s\n", tokenNames[result]); */
210
211 return result;
212}
213
214static enum ETokenType
729e4ab9 215peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
b75a7d8f 216{
729e4ab9 217 uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
b75a7d8f
A
218
219 if (U_FAILURE(*status))
220 {
221 return TOK_ERROR;
222 }
223
224 if (lookaheadCount >= MAX_LOOKAHEAD)
225 {
226 *status = U_INTERNAL_PROGRAM_ERROR;
227 return TOK_ERROR;
228 }
229
230 if (tokenValue != NULL)
231 {
729e4ab9 232 *tokenValue = &state->lookahead[i].value;
b75a7d8f
A
233 }
234
235 if (linenumber != NULL)
236 {
729e4ab9 237 *linenumber = state->lookahead[i].line;
b75a7d8f
A
238 }
239
374ca955 240 if(comment != NULL){
729e4ab9 241 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
374ca955
A
242 }
243
729e4ab9 244 return state->lookahead[i].type;
b75a7d8f
A
245}
246
247static void
729e4ab9 248expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
b75a7d8f
A
249{
250 uint32_t line;
374ca955 251
729e4ab9 252 enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
b75a7d8f 253
73c04bcf 254 if (linenumber != NULL)
b75a7d8f 255 {
73c04bcf 256 *linenumber = line;
b75a7d8f
A
257 }
258
73c04bcf 259 if (U_FAILURE(*status))
b75a7d8f 260 {
73c04bcf 261 return;
b75a7d8f
A
262 }
263
264 if (token != expectedToken)
265 {
266 *status = U_INVALID_FORMAT_ERROR;
267 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
268 }
46f4442e 269 else
b75a7d8f
A
270 {
271 *status = U_ZERO_ERROR;
272 }
273}
274
729e4ab9 275static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
b75a7d8f
A
276{
277 struct UString *tokenValue;
278 char *result;
279 uint32_t count;
280
729e4ab9 281 expect(state, TOK_STRING, &tokenValue, comment, line, status);
b75a7d8f
A
282
283 if (U_FAILURE(*status))
284 {
285 return NULL;
286 }
287
374ca955
A
288 count = u_strlen(tokenValue->fChars);
289 if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
290 *status = U_INVALID_FORMAT_ERROR;
291 error(*line, "invariant characters required for table keys, binary data, etc.");
292 return NULL;
293 }
294
51004dcb 295 result = static_cast<char *>(uprv_malloc(count+1));
b75a7d8f
A
296
297 if (result == NULL)
298 {
299 *status = U_MEMORY_ALLOCATION_ERROR;
300 return NULL;
301 }
302
374ca955 303 u_UCharsToChars(tokenValue->fChars, result, count+1);
b75a7d8f
A
304 return result;
305}
306
b75a7d8f 307static struct SResource *
4388f060 308parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
b75a7d8f
A
309{
310 struct SResource *result = NULL;
311 struct UString *tokenValue;
312 FileStream *file = NULL;
313 char filename[256] = { '\0' };
314 char cs[128] = { '\0' };
315 uint32_t line;
b75a7d8f
A
316 UBool quoted = FALSE;
317 UCHARBUF *ucbuf=NULL;
318 UChar32 c = 0;
319 const char* cp = NULL;
320 UChar *pTarget = NULL;
321 UChar *target = NULL;
322 UChar *targetLimit = NULL;
323 int32_t size = 0;
324
729e4ab9 325 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
b75a7d8f
A
326
327 if(isVerbose()){
374ca955 328 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
329 }
330
331 if (U_FAILURE(*status))
332 {
333 return NULL;
334 }
335 /* make the filename including the directory */
729e4ab9 336 if (state->inputdir != NULL)
b75a7d8f 337 {
729e4ab9 338 uprv_strcat(filename, state->inputdir);
b75a7d8f 339
729e4ab9 340 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
b75a7d8f
A
341 {
342 uprv_strcat(filename, U_FILE_SEP_STRING);
343 }
344 }
345
346 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
347
729e4ab9 348 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
349
350 if (U_FAILURE(*status))
351 {
352 return NULL;
353 }
b75a7d8f
A
354 uprv_strcat(filename, cs);
355
57a6839d 356 if(state->omitCollationRules) {
46f4442e
A
357 return res_none();
358 }
b75a7d8f
A
359
360 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
361
362 if (U_FAILURE(*status)) {
363 error(line, "An error occured while opening the input file %s\n", filename);
364 return NULL;
365 }
366
367 /* We allocate more space than actually required
368 * since the actual size needed for storing UChars
369 * is not known in UTF-8 byte stream
370 */
73c04bcf 371 size = ucbuf_size(ucbuf) + 1;
b75a7d8f 372 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
374ca955 373 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
b75a7d8f
A
374 target = pTarget;
375 targetLimit = pTarget+size;
376
377 /* read the rules into the buffer */
378 while (target < targetLimit)
379 {
380 c = ucbuf_getc(ucbuf, status);
381 if(c == QUOTE) {
73c04bcf 382 quoted = (UBool)!quoted;
b75a7d8f
A
383 }
384 /* weiv (06/26/2002): adding the following:
385 * - preserving spaces in commands [...]
386 * - # comments until the end of line
387 */
374ca955 388 if (c == STARTCOMMAND && !quoted)
b75a7d8f 389 {
73c04bcf
A
390 /* preserve commands
391 * closing bracket will be handled by the
392 * append at the end of the loop
393 */
394 while(c != ENDCOMMAND) {
51004dcb 395 U_APPEND_CHAR32_ONLY(c, target);
73c04bcf
A
396 c = ucbuf_getc(ucbuf, status);
397 }
398 }
399 else if (c == HASH && !quoted) {
400 /* skip comments */
401 while(c != CR && c != LF) {
402 c = ucbuf_getc(ucbuf, status);
403 }
404 continue;
405 }
406 else if (c == ESCAPE)
b75a7d8f
A
407 {
408 c = unescape(ucbuf, status);
409
4388f060 410 if (c == (UChar32)U_ERR)
b75a7d8f
A
411 {
412 uprv_free(pTarget);
413 T_FileStream_close(file);
414 return NULL;
415 }
416 }
374ca955 417 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
b75a7d8f 418 {
73c04bcf
A
419 /* ignore spaces carriage returns
420 * and line feed unless in the form \uXXXX
b75a7d8f
A
421 */
422 continue;
423 }
424
425 /* Append UChar * after dissembling if c > 0xffff*/
4388f060 426 if (c != (UChar32)U_EOF)
b75a7d8f 427 {
51004dcb 428 U_APPEND_CHAR32_ONLY(c, target);
b75a7d8f
A
429 }
430 else
431 {
432 break;
433 }
434 }
435
374ca955
A
436 /* terminate the string */
437 if(target < targetLimit){
438 *target = 0x0000;
439 }
440
729e4ab9 441 result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
374ca955 442
b75a7d8f
A
443
444 ucbuf_close(ucbuf);
445 uprv_free(pTarget);
446 T_FileStream_close(file);
447
448 return result;
449}
450
73c04bcf 451static struct SResource *
4388f060 452parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
73c04bcf
A
453{
454 struct SResource *result = NULL;
455 struct UString *tokenValue;
456 FileStream *file = NULL;
457 char filename[256] = { '\0' };
458 char cs[128] = { '\0' };
459 uint32_t line;
460 UCHARBUF *ucbuf=NULL;
461 const char* cp = NULL;
462 UChar *pTarget = NULL;
463 const UChar *pSource = NULL;
464 int32_t size = 0;
465
729e4ab9 466 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
73c04bcf
A
467
468 if(isVerbose()){
469 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
470 }
471
472 if (U_FAILURE(*status))
473 {
474 return NULL;
475 }
476 /* make the filename including the directory */
729e4ab9 477 if (state->inputdir != NULL)
73c04bcf 478 {
729e4ab9 479 uprv_strcat(filename, state->inputdir);
73c04bcf 480
729e4ab9 481 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
73c04bcf
A
482 {
483 uprv_strcat(filename, U_FILE_SEP_STRING);
484 }
485 }
486
487 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
488
729e4ab9 489 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
73c04bcf
A
490
491 if (U_FAILURE(*status))
492 {
493 return NULL;
494 }
495 uprv_strcat(filename, cs);
496
497
498 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
499
500 if (U_FAILURE(*status)) {
501 error(line, "An error occured while opening the input file %s\n", filename);
502 return NULL;
503 }
504
505 /* We allocate more space than actually required
506 * since the actual size needed for storing UChars
507 * is not known in UTF-8 byte stream
508 */
509 pSource = ucbuf_getBuffer(ucbuf, &size, status);
510 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
511 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
512
513#if !UCONFIG_NO_TRANSLITERATION
514 size = utrans_stripRules(pSource, size, pTarget, status);
515#else
46f4442e 516 size = 0;
73c04bcf
A
517 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
518#endif
729e4ab9 519 result = string_open(state->bundle, tag, pTarget, size, NULL, status);
73c04bcf
A
520
521 ucbuf_close(ucbuf);
522 uprv_free(pTarget);
523 T_FileStream_close(file);
524
525 return result;
526}
2ca993e8 527static ArrayResource* dependencyArray = NULL;
73c04bcf
A
528
529static struct SResource *
729e4ab9 530parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
73c04bcf
A
531{
532 struct SResource *result = NULL;
533 struct SResource *elem = NULL;
534 struct UString *tokenValue;
535 uint32_t line;
536 char filename[256] = { '\0' };
537 char cs[128] = { '\0' };
538
729e4ab9 539 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
73c04bcf
A
540
541 if(isVerbose()){
542 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
543 }
544
545 if (U_FAILURE(*status))
546 {
547 return NULL;
548 }
549 /* make the filename including the directory */
729e4ab9 550 if (state->outputdir != NULL)
73c04bcf 551 {
729e4ab9 552 uprv_strcat(filename, state->outputdir);
73c04bcf 553
729e4ab9 554 if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
73c04bcf
A
555 {
556 uprv_strcat(filename, U_FILE_SEP_STRING);
557 }
558 }
559
560 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
561
562 if (U_FAILURE(*status))
563 {
564 return NULL;
565 }
566 uprv_strcat(filename, cs);
567 if(!T_FileStream_file_exists(filename)){
568 if(isStrict()){
569 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
570 }else{
571 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
572 }
573 }
574 if(dependencyArray==NULL){
729e4ab9 575 dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
73c04bcf
A
576 }
577 if(tag!=NULL){
729e4ab9 578 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
73c04bcf 579 }
729e4ab9 580 elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
73c04bcf 581
2ca993e8 582 dependencyArray->add(elem);
73c04bcf
A
583
584 if (U_FAILURE(*status))
585 {
586 return NULL;
587 }
729e4ab9 588 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
73c04bcf
A
589 return result;
590}
b75a7d8f 591static struct SResource *
729e4ab9 592parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
b75a7d8f
A
593{
594 struct UString *tokenValue;
595 struct SResource *result = NULL;
596
73c04bcf 597/* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
b75a7d8f
A
598 {
599 return parseUCARules(tag, startline, status);
73c04bcf 600 }*/
b75a7d8f 601 if(isVerbose()){
374ca955 602 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f 603 }
729e4ab9 604 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
b75a7d8f
A
605
606 if (U_SUCCESS(*status))
607 {
608 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
609 doesn't survive expect either) */
610
729e4ab9 611 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
b75a7d8f 612 if(U_SUCCESS(*status) && result) {
729e4ab9 613 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f 614
46f4442e
A
615 if (U_FAILURE(*status))
616 {
617 res_close(result);
618 return NULL;
619 }
b75a7d8f
A
620 }
621 }
622
623 return result;
624}
625
626static struct SResource *
729e4ab9 627parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
628{
629 struct UString *tokenValue;
374ca955 630 struct SResource *result = NULL;
b75a7d8f 631
729e4ab9 632 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
b75a7d8f
A
633
634 if(isVerbose()){
374ca955 635 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
636 }
637
638 if (U_SUCCESS(*status))
639 {
640 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
641 doesn't survive expect either) */
642
729e4ab9 643 result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
b75a7d8f 644
729e4ab9 645 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
646
647 if (U_FAILURE(*status))
648 {
46f4442e 649 res_close(result);
b75a7d8f
A
650 return NULL;
651 }
652 }
653
654 return result;
655}
656
57a6839d
A
657#if !UCONFIG_NO_COLLATION
658
659namespace {
729e4ab9
A
660
661static struct SResource* resLookup(struct SResource* res, const char* key){
2ca993e8 662 if (res == res_none() || !res->isTable()) {
729e4ab9
A
663 return NULL;
664 }
665
2ca993e8
A
666 TableResource *list = static_cast<TableResource *>(res);
667 SResource *current = list->fFirst;
729e4ab9
A
668 while (current != NULL) {
669 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
670 return current;
671 }
672 current = current->fNext;
673 }
674 return NULL;
675}
676
57a6839d
A
677class GenrbImporter : public icu::CollationRuleParser::Importer {
678public:
679 GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out) {}
680 virtual ~GenrbImporter();
b331163b 681 virtual void getRules(
57a6839d 682 const char *localeID, const char *collationType,
b331163b 683 UnicodeString &rules,
57a6839d
A
684 const char *&errorReason, UErrorCode &errorCode);
685
686private:
687 const char *inputDir;
688 const char *outputDir;
57a6839d
A
689};
690
691GenrbImporter::~GenrbImporter() {}
692
b331163b 693void
57a6839d
A
694GenrbImporter::getRules(
695 const char *localeID, const char *collationType,
b331163b 696 UnicodeString &rules,
57a6839d 697 const char *& /*errorReason*/, UErrorCode &errorCode) {
2ca993e8
A
698 CharString filename(localeID, errorCode);
699 for(int32_t i = 0; i < filename.length(); i++){
729e4ab9 700 if(filename[i] == '-'){
2ca993e8 701 filename.data()[i] = '_';
729e4ab9
A
702 }
703 }
2ca993e8 704 filename.append(".txt", errorCode);
57a6839d 705 if (U_FAILURE(errorCode)) {
b331163b 706 return;
729e4ab9 707 }
2ca993e8
A
708 CharString inputDirBuf;
709 CharString openFileName;
57a6839d 710 if(inputDir == NULL) {
2ca993e8 711 const char *filenameBegin = uprv_strrchr(filename.data(), U_FILE_SEP_CHAR);
729e4ab9
A
712 if (filenameBegin != NULL) {
713 /*
714 * When a filename ../../../data/root.txt is specified,
715 * we presume that the input directory is ../../../data
716 * This is very important when the resource file includes
717 * another file, like UCARules.txt or thaidict.brk.
718 */
2ca993e8
A
719 StringPiece dir = filename.toStringPiece();
720 const char *filenameLimit = filename.data() + filename.length();
721 dir.remove_suffix((int32_t)(filenameLimit - filenameBegin));
722 inputDirBuf.append(dir, errorCode);
723 inputDir = inputDirBuf.data();
729e4ab9
A
724 }
725 }else{
2ca993e8 726 int32_t dirlen = (int32_t)uprv_strlen(inputDir);
729e4ab9 727
2ca993e8 728 if((filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')) {
729e4ab9
A
729 /*
730 * append the input dir to openFileName if the first char in
2ca993e8 731 * filename is not file separator char and the last char input directory is not '.'.
729e4ab9
A
732 * This is to support :
733 * genrb -s. /home/icu/data
734 * genrb -s. icu/data
735 * The user cannot mix notations like
736 * genrb -s. /icu/data --- the absolute path specified. -s redundant
737 * user should use
738 * genrb -s. icu/data --- start from CWD and look in icu/data dir
739 */
2ca993e8
A
740 openFileName.append(inputDir, dirlen, errorCode);
741 if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
742 openFileName.append(U_FILE_SEP_CHAR, errorCode);
729e4ab9 743 }
729e4ab9
A
744 }
745 }
2ca993e8
A
746 openFileName.append(filename, errorCode);
747 if(U_FAILURE(errorCode)) {
748 return;
749 }
750 // printf("GenrbImporter::getRules(%s, %s) reads %s\n", localeID, collationType, openFileName.data());
751 const char* cp = "";
752 LocalUCHARBUFPointer ucbuf(
753 ucbuf_open(openFileName.data(), &cp, getShowWarning(), TRUE, &errorCode));
57a6839d 754 if(errorCode == U_FILE_ACCESS_ERROR) {
2ca993e8
A
755 fprintf(stderr, "couldn't open file %s\n", openFileName.data());
756 return;
729e4ab9 757 }
2ca993e8
A
758 if (ucbuf.isNull() || U_FAILURE(errorCode)) {
759 fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName.data(), u_errorName(errorCode));
760 return;
729e4ab9
A
761 }
762
763 /* Parse the data into an SRBRoot */
2ca993e8
A
764 struct SRBRoot *data =
765 parse(ucbuf.getAlias(), inputDir, outputDir, filename.data(), FALSE, FALSE, &errorCode);
b331163b 766 if (U_FAILURE(errorCode)) {
2ca993e8 767 return;
b331163b 768 }
729e4ab9 769
2ca993e8
A
770 struct SResource *root = data->fRoot;
771 struct SResource *collations = resLookup(root, "collations");
4388f060 772 if (collations != NULL) {
2ca993e8 773 struct SResource *collation = resLookup(collations, collationType);
4388f060 774 if (collation != NULL) {
2ca993e8
A
775 struct SResource *sequence = resLookup(collation, "Sequence");
776 if (sequence != NULL && sequence->isString()) {
b331163b 777 // No string pointer aliasing so that we need not hold onto the resource bundle.
2ca993e8
A
778 StringResource *sr = static_cast<StringResource *>(sequence);
779 rules = sr->fString;
4388f060
A
780 }
781 }
782 }
729e4ab9
A
783}
784
4388f060
A
785// Quick-and-dirty escaping function.
786// Assumes that we are on an ASCII-based platform.
787static void
788escape(const UChar *s, char *buffer) {
789 int32_t length = u_strlen(s);
790 int32_t i = 0;
791 for (;;) {
792 UChar32 c;
793 U16_NEXT(s, i, length, c);
794 if (c == 0) {
795 *buffer = 0;
796 return;
797 } else if (0x20 <= c && c <= 0x7e) {
798 // printable ASCII
799 *buffer++ = (char)c; // assumes ASCII-based platform
800 } else {
801 buffer += sprintf(buffer, "\\u%04X", (int)c);
802 }
803 }
804}
805
57a6839d
A
806} // namespace
807
808#endif // !UCONFIG_NO_COLLATION
809
2ca993e8
A
810static TableResource *
811addCollation(ParseState* state, TableResource *result, const char *collationType,
57a6839d 812 uint32_t startline, UErrorCode *status)
b75a7d8f 813{
57a6839d 814 // TODO: Use LocalPointer for result, or make caller close it when there is a failure.
b75a7d8f
A
815 struct SResource *member = NULL;
816 struct UString *tokenValue;
374ca955 817 struct UString comment;
b75a7d8f
A
818 enum ETokenType token;
819 char subtag[1024];
57a6839d
A
820 UnicodeString rules;
821 UBool haveRules = FALSE;
b75a7d8f 822 UVersionInfo version;
b75a7d8f 823 uint32_t line;
57a6839d 824
b75a7d8f 825 /* '{' . (name resource)* '}' */
374ca955
A
826 version[0]=0; version[1]=0; version[2]=0; version[3]=0;
827
b75a7d8f
A
828 for (;;)
829 {
374ca955 830 ustr_init(&comment);
729e4ab9 831 token = getToken(state, &tokenValue, &comment, &line, status);
b75a7d8f
A
832
833 if (token == TOK_CLOSE_BRACE)
834 {
57a6839d 835 break;
b75a7d8f
A
836 }
837
838 if (token != TOK_STRING)
839 {
46f4442e 840 res_close(result);
b75a7d8f
A
841 *status = U_INVALID_FORMAT_ERROR;
842
843 if (token == TOK_EOF)
844 {
845 error(startline, "unterminated table");
846 }
847 else
848 {
849 error(line, "Unexpected token %s", tokenNames[token]);
850 }
851
852 return NULL;
853 }
854
855 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
856
857 if (U_FAILURE(*status))
858 {
46f4442e 859 res_close(result);
b75a7d8f
A
860 return NULL;
861 }
862
729e4ab9 863 member = parseResource(state, subtag, NULL, status);
b75a7d8f
A
864
865 if (U_FAILURE(*status))
866 {
46f4442e 867 res_close(result);
b75a7d8f
A
868 return NULL;
869 }
57a6839d
A
870 if (result == NULL)
871 {
872 // Ignore the parsed resources, continue parsing.
873 }
2ca993e8 874 else if (uprv_strcmp(subtag, "Version") == 0 && member->isString())
b75a7d8f 875 {
2ca993e8 876 StringResource *sr = static_cast<StringResource *>(member);
b75a7d8f 877 char ver[40];
2ca993e8 878 int32_t length = sr->length();
b75a7d8f 879
2ca993e8 880 if (length >= UPRV_LENGTHOF(ver))
b75a7d8f 881 {
2ca993e8 882 length = UPRV_LENGTHOF(ver) - 1;
b75a7d8f
A
883 }
884
2ca993e8 885 sr->fString.extract(0, length, ver, UPRV_LENGTHOF(ver), US_INV);
b75a7d8f
A
886 u_versionFromString(version, ver);
887
2ca993e8 888 result->add(member, line, *status);
57a6839d 889 member = NULL;
b75a7d8f
A
890 }
891 else if(uprv_strcmp(subtag, "%%CollationBin")==0)
892 {
893 /* discard duplicate %%CollationBin if any*/
894 }
2ca993e8 895 else if (uprv_strcmp(subtag, "Sequence") == 0 && member->isString())
b75a7d8f 896 {
2ca993e8
A
897 StringResource *sr = static_cast<StringResource *>(member);
898 rules = sr->fString;
57a6839d
A
899 haveRules = TRUE;
900 // Defer building the collator until we have seen
901 // all sub-elements of the collation table, including the Version.
729e4ab9
A
902 /* in order to achieve smaller data files, we can direct genrb */
903 /* to omit collation rules */
57a6839d 904 if(!state->omitCollationRules) {
2ca993e8 905 result->add(member, line, *status);
57a6839d 906 member = NULL;
729e4ab9 907 }
b75a7d8f 908 }
57a6839d
A
909 else // Just copy non-special items.
910 {
2ca993e8 911 result->add(member, line, *status);
57a6839d
A
912 member = NULL;
913 }
914 res_close(member); // TODO: use LocalPointer
b75a7d8f
A
915 if (U_FAILURE(*status))
916 {
46f4442e 917 res_close(result);
b75a7d8f
A
918 return NULL;
919 }
920 }
921
57a6839d
A
922 if (!haveRules) { return result; }
923
924#if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
925 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
926 (void)collationType;
927#else
b331163b
A
928 // CLDR ticket #3949, ICU ticket #8082:
929 // Do not build collation binary data for for-import-only "private" collation rule strings.
930 if (uprv_strncmp(collationType, "private-", 8) == 0) {
931 if(isVerbose()) {
932 printf("Not building %s~%s collation binary\n", state->filename, collationType);
933 }
934 return result;
935 }
936
57a6839d
A
937 if(!state->makeBinaryCollation) {
938 if(isVerbose()) {
939 printf("Not building %s~%s collation binary\n", state->filename, collationType);
940 }
941 return result;
942 }
943 UErrorCode intStatus = U_ZERO_ERROR;
944 UParseError parseError;
945 uprv_memset(&parseError, 0, sizeof(parseError));
946 GenrbImporter importer(state->inputdir, state->outputdir);
947 const icu::CollationTailoring *base = icu::CollationRoot::getRoot(intStatus);
948 if(U_FAILURE(intStatus)) {
949 error(line, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus));
950 res_close(result);
951 return NULL; // TODO: use LocalUResourceBundlePointer for result
952 }
953 icu::CollationBuilder builder(base, intStatus);
954 if(uprv_strncmp(collationType, "search", 6) == 0) {
955 builder.disableFastLatin(); // build fast-Latin table unless search collator
956 }
957 LocalPointer<icu::CollationTailoring> t(
958 builder.parseAndBuild(rules, version, &importer, &parseError, intStatus));
959 if(U_FAILURE(intStatus)) {
960 const char *reason = builder.getErrorReason();
961 if(reason == NULL) { reason = ""; }
962 error(line, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s %s",
963 state->filename, collationType,
964 (long)parseError.offset, u_errorName(intStatus), reason);
965 if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) {
966 // Print pre- and post-context.
967 char preBuffer[100], postBuffer[100];
968 escape(parseError.preContext, preBuffer);
969 escape(parseError.postContext, postBuffer);
970 error(line, " error context: \"...%s\" ! \"%s...\"", preBuffer, postBuffer);
971 }
2ca993e8 972 if(isStrict() || t.isNull()) {
57a6839d
A
973 *status = intStatus;
974 res_close(result);
975 return NULL;
976 }
977 }
978 icu::LocalMemory<uint8_t> buffer;
979 int32_t capacity = 100000;
980 uint8_t *dest = buffer.allocateInsteadAndCopy(capacity);
981 if(dest == NULL) {
982 fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
983 (long)capacity);
984 *status = U_MEMORY_ALLOCATION_ERROR;
985 res_close(result);
986 return NULL;
987 }
988 int32_t indexes[icu::CollationDataReader::IX_TOTAL_SIZE + 1];
989 int32_t totalSize = icu::CollationDataWriter::writeTailoring(
990 *t, *t->settings, indexes, dest, capacity, intStatus);
991 if(intStatus == U_BUFFER_OVERFLOW_ERROR) {
992 intStatus = U_ZERO_ERROR;
993 capacity = totalSize;
994 dest = buffer.allocateInsteadAndCopy(capacity);
995 if(dest == NULL) {
996 fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
997 (long)capacity);
998 *status = U_MEMORY_ALLOCATION_ERROR;
999 res_close(result);
1000 return NULL;
1001 }
1002 totalSize = icu::CollationDataWriter::writeTailoring(
1003 *t, *t->settings, indexes, dest, capacity, intStatus);
1004 }
1005 if(U_FAILURE(intStatus)) {
1006 fprintf(stderr, "CollationDataWriter::writeTailoring() failed: %s\n",
1007 u_errorName(intStatus));
1008 res_close(result);
1009 return NULL;
1010 }
1011 if(isVerbose()) {
1012 printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType);
1013 icu::CollationInfo::printSizes(totalSize, indexes);
b331163b
A
1014 if(t->settings->hasReordering()) {
1015 printf("%s~%s collation reordering ranges:\n", state->filename, collationType);
1016 icu::CollationInfo::printReorderRanges(
1017 *t->data, t->settings->reorderCodes, t->settings->reorderCodesLength);
1018 }
57a6839d
A
1019 }
1020 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", totalSize, dest, NULL, NULL, status);
2ca993e8 1021 result->add(collationBin, line, *status);
57a6839d
A
1022 if (U_FAILURE(*status)) {
1023 res_close(result);
1024 return NULL;
1025 }
1026#endif
1027 return result;
1028}
1029
1030static UBool
b331163b
A
1031keepCollationType(const char * /*type*/) {
1032 return TRUE;
b75a7d8f
A
1033}
1034
374ca955 1035static struct SResource *
729e4ab9 1036parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
374ca955 1037{
2ca993e8 1038 TableResource *result = NULL;
374ca955 1039 struct SResource *member = NULL;
374ca955
A
1040 struct UString *tokenValue;
1041 struct UString comment;
1042 enum ETokenType token;
1043 char subtag[1024], typeKeyword[1024];
1044 uint32_t line;
1045
729e4ab9 1046 result = table_open(state->bundle, tag, NULL, status);
374ca955
A
1047
1048 if (result == NULL || U_FAILURE(*status))
1049 {
1050 return NULL;
1051 }
1052 if(isVerbose()){
1053 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1054 }
1055 if(!newCollation) {
57a6839d 1056 return addCollation(state, result, "(no type)", startline, status);
73c04bcf
A
1057 }
1058 else {
1059 for(;;) {
1060 ustr_init(&comment);
729e4ab9 1061 token = getToken(state, &tokenValue, &comment, &line, status);
374ca955 1062
73c04bcf 1063 if (token == TOK_CLOSE_BRACE)
374ca955 1064 {
73c04bcf 1065 return result;
374ca955 1066 }
73c04bcf
A
1067
1068 if (token != TOK_STRING)
374ca955 1069 {
46f4442e 1070 res_close(result);
73c04bcf
A
1071 *status = U_INVALID_FORMAT_ERROR;
1072
1073 if (token == TOK_EOF)
1074 {
1075 error(startline, "unterminated table");
1076 }
1077 else
1078 {
1079 error(line, "Unexpected token %s", tokenNames[token]);
1080 }
1081
1082 return NULL;
374ca955
A
1083 }
1084
73c04bcf 1085 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
374ca955 1086
73c04bcf
A
1087 if (U_FAILURE(*status))
1088 {
46f4442e 1089 res_close(result);
73c04bcf
A
1090 return NULL;
1091 }
374ca955 1092
73c04bcf
A
1093 if (uprv_strcmp(subtag, "default") == 0)
1094 {
729e4ab9 1095 member = parseResource(state, subtag, NULL, status);
374ca955 1096
73c04bcf
A
1097 if (U_FAILURE(*status))
1098 {
46f4442e 1099 res_close(result);
73c04bcf
A
1100 return NULL;
1101 }
374ca955 1102
2ca993e8 1103 result->add(member, line, *status);
73c04bcf
A
1104 }
1105 else
1106 {
729e4ab9 1107 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
73c04bcf
A
1108 /* this probably needs to be refactored or recursively use the parser */
1109 /* first we assume that our collation table won't have the explicit type */
1110 /* then, we cannot handle aliases */
1111 if(token == TOK_OPEN_BRACE) {
729e4ab9 1112 token = getToken(state, &tokenValue, &comment, &line, status);
2ca993e8 1113 TableResource *collationRes;
57a6839d
A
1114 if (keepCollationType(subtag)) {
1115 collationRes = table_open(state->bundle, subtag, NULL, status);
1116 } else {
1117 collationRes = NULL;
1118 }
1119 // need to parse the collation data regardless
1120 collationRes = addCollation(state, collationRes, subtag, startline, status);
1121 if (collationRes != NULL) {
2ca993e8 1122 result->add(collationRes, startline, *status);
729e4ab9 1123 }
73c04bcf
A
1124 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
1125 /* we could have a table too */
729e4ab9 1126 token = peekToken(state, 1, &tokenValue, &line, &comment, status);
73c04bcf
A
1127 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
1128 if(uprv_strcmp(typeKeyword, "alias") == 0) {
729e4ab9 1129 member = parseResource(state, subtag, NULL, status);
73c04bcf
A
1130 if (U_FAILURE(*status))
1131 {
46f4442e 1132 res_close(result);
73c04bcf
A
1133 return NULL;
1134 }
374ca955 1135
2ca993e8 1136 result->add(member, line, *status);
73c04bcf 1137 } else {
46f4442e 1138 res_close(result);
73c04bcf
A
1139 *status = U_INVALID_FORMAT_ERROR;
1140 return NULL;
1141 }
1142 } else {
46f4442e 1143 res_close(result);
73c04bcf
A
1144 *status = U_INVALID_FORMAT_ERROR;
1145 return NULL;
1146 }
374ca955 1147 }
374ca955 1148
73c04bcf 1149 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
374ca955 1150
73c04bcf 1151 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
374ca955 1152
73c04bcf
A
1153 if (U_FAILURE(*status))
1154 {
46f4442e 1155 res_close(result);
73c04bcf
A
1156 return NULL;
1157 }
374ca955 1158 }
374ca955
A
1159 }
1160}
1161
b75a7d8f
A
1162/* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1163 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1164static struct SResource *
2ca993e8 1165realParseTable(ParseState* state, TableResource *table, char *tag, uint32_t startline, UErrorCode *status)
b75a7d8f
A
1166{
1167 struct SResource *member = NULL;
1168 struct UString *tokenValue=NULL;
374ca955 1169 struct UString comment;
b75a7d8f
A
1170 enum ETokenType token;
1171 char subtag[1024];
1172 uint32_t line;
1173 UBool readToken = FALSE;
1174
1175 /* '{' . (name resource)* '}' */
4388f060 1176
b75a7d8f 1177 if(isVerbose()){
374ca955 1178 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1179 }
1180 for (;;)
1181 {
374ca955 1182 ustr_init(&comment);
729e4ab9 1183 token = getToken(state, &tokenValue, &comment, &line, status);
b75a7d8f
A
1184
1185 if (token == TOK_CLOSE_BRACE)
1186 {
1187 if (!readToken) {
1188 warning(startline, "Encountered empty table");
1189 }
1190 return table;
1191 }
1192
1193 if (token != TOK_STRING)
1194 {
b75a7d8f
A
1195 *status = U_INVALID_FORMAT_ERROR;
1196
1197 if (token == TOK_EOF)
1198 {
1199 error(startline, "unterminated table");
1200 }
1201 else
1202 {
374ca955 1203 error(line, "unexpected token %s", tokenNames[token]);
b75a7d8f
A
1204 }
1205
1206 return NULL;
1207 }
1208
374ca955
A
1209 if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
1210 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1211 } else {
1212 *status = U_INVALID_FORMAT_ERROR;
1213 error(line, "invariant characters required for table keys");
374ca955
A
1214 return NULL;
1215 }
b75a7d8f
A
1216
1217 if (U_FAILURE(*status))
1218 {
729e4ab9 1219 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
b75a7d8f
A
1220 return NULL;
1221 }
1222
729e4ab9 1223 member = parseResource(state, subtag, &comment, status);
b75a7d8f
A
1224
1225 if (member == NULL || U_FAILURE(*status))
1226 {
729e4ab9 1227 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
b75a7d8f
A
1228 return NULL;
1229 }
1230
2ca993e8 1231 table->add(member, line, *status);
b75a7d8f
A
1232
1233 if (U_FAILURE(*status))
1234 {
729e4ab9 1235 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
b75a7d8f
A
1236 return NULL;
1237 }
1238 readToken = TRUE;
46f4442e 1239 ustr_deinit(&comment);
4388f060 1240 }
b75a7d8f
A
1241
1242 /* not reached */
1243 /* A compiler warning will appear if all paths don't contain a return statement. */
1244/* *status = U_INTERNAL_PROGRAM_ERROR;
1245 return NULL;*/
1246}
1247
1248static struct SResource *
729e4ab9 1249parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f 1250{
b75a7d8f
A
1251 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
1252 {
729e4ab9 1253 return parseCollationElements(state, tag, startline, FALSE, status);
374ca955
A
1254 }
1255 if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1256 {
729e4ab9 1257 return parseCollationElements(state, tag, startline, TRUE, status);
b75a7d8f
A
1258 }
1259 if(isVerbose()){
374ca955 1260 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f 1261 }
374ca955 1262
2ca993e8 1263 TableResource *result = table_open(state->bundle, tag, comment, status);
b75a7d8f
A
1264
1265 if (result == NULL || U_FAILURE(*status))
1266 {
1267 return NULL;
1268 }
729e4ab9 1269 return realParseTable(state, result, tag, startline, status);
b75a7d8f
A
1270}
1271
1272static struct SResource *
729e4ab9 1273parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f 1274{
b75a7d8f
A
1275 struct SResource *member = NULL;
1276 struct UString *tokenValue;
374ca955 1277 struct UString memberComments;
b75a7d8f
A
1278 enum ETokenType token;
1279 UBool readToken = FALSE;
1280
2ca993e8 1281 ArrayResource *result = array_open(state->bundle, tag, comment, status);
b75a7d8f
A
1282
1283 if (result == NULL || U_FAILURE(*status))
1284 {
1285 return NULL;
1286 }
1287 if(isVerbose()){
374ca955 1288 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f 1289 }
374ca955
A
1290
1291 ustr_init(&memberComments);
1292
b75a7d8f
A
1293 /* '{' . resource [','] '}' */
1294 for (;;)
1295 {
374ca955
A
1296 /* reset length */
1297 ustr_setlen(&memberComments, 0, status);
1298
b75a7d8f 1299 /* check for end of array, but don't consume next token unless it really is the end */
729e4ab9 1300 token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
374ca955 1301
b75a7d8f
A
1302
1303 if (token == TOK_CLOSE_BRACE)
1304 {
729e4ab9 1305 getToken(state, NULL, NULL, NULL, status);
b75a7d8f
A
1306 if (!readToken) {
1307 warning(startline, "Encountered empty array");
1308 }
1309 break;
1310 }
1311
1312 if (token == TOK_EOF)
1313 {
46f4442e 1314 res_close(result);
b75a7d8f
A
1315 *status = U_INVALID_FORMAT_ERROR;
1316 error(startline, "unterminated array");
1317 return NULL;
1318 }
1319
1320 /* string arrays are a special case */
1321 if (token == TOK_STRING)
1322 {
729e4ab9
A
1323 getToken(state, &tokenValue, &memberComments, NULL, status);
1324 member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
b75a7d8f
A
1325 }
1326 else
1327 {
729e4ab9 1328 member = parseResource(state, NULL, &memberComments, status);
b75a7d8f
A
1329 }
1330
1331 if (member == NULL || U_FAILURE(*status))
1332 {
46f4442e 1333 res_close(result);
b75a7d8f
A
1334 return NULL;
1335 }
1336
2ca993e8 1337 result->add(member);
b75a7d8f
A
1338
1339 /* eat optional comma if present */
729e4ab9 1340 token = peekToken(state, 0, NULL, NULL, NULL, status);
b75a7d8f
A
1341
1342 if (token == TOK_COMMA)
1343 {
729e4ab9 1344 getToken(state, NULL, NULL, NULL, status);
b75a7d8f
A
1345 }
1346
1347 if (U_FAILURE(*status))
1348 {
46f4442e 1349 res_close(result);
b75a7d8f
A
1350 return NULL;
1351 }
1352 readToken = TRUE;
1353 }
1354
46f4442e 1355 ustr_deinit(&memberComments);
b75a7d8f
A
1356 return result;
1357}
1358
1359static struct SResource *
729e4ab9 1360parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f 1361{
b75a7d8f
A
1362 enum ETokenType token;
1363 char *string;
1364 int32_t value;
1365 UBool readToken = FALSE;
b75a7d8f
A
1366 char *stopstring;
1367 uint32_t len;
374ca955 1368 struct UString memberComments;
b75a7d8f 1369
2ca993e8 1370 IntVectorResource *result = intvector_open(state->bundle, tag, comment, status);
b75a7d8f
A
1371
1372 if (result == NULL || U_FAILURE(*status))
1373 {
1374 return NULL;
1375 }
1376
1377 if(isVerbose()){
374ca955 1378 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f 1379 }
374ca955 1380 ustr_init(&memberComments);
b75a7d8f
A
1381 /* '{' . string [','] '}' */
1382 for (;;)
1383 {
374ca955
A
1384 ustr_setlen(&memberComments, 0, status);
1385
b75a7d8f 1386 /* check for end of array, but don't consume next token unless it really is the end */
729e4ab9 1387 token = peekToken(state, 0, NULL, NULL,&memberComments, status);
b75a7d8f
A
1388
1389 if (token == TOK_CLOSE_BRACE)
1390 {
1391 /* it's the end, consume the close brace */
729e4ab9 1392 getToken(state, NULL, NULL, NULL, status);
b75a7d8f
A
1393 if (!readToken) {
1394 warning(startline, "Encountered empty int vector");
1395 }
46f4442e 1396 ustr_deinit(&memberComments);
b75a7d8f
A
1397 return result;
1398 }
1399
729e4ab9 1400 string = getInvariantString(state, NULL, NULL, status);
b75a7d8f
A
1401
1402 if (U_FAILURE(*status))
1403 {
46f4442e 1404 res_close(result);
b75a7d8f
A
1405 return NULL;
1406 }
b75a7d8f 1407
46f4442e 1408 /* For handling illegal char in the Intvector */
b75a7d8f 1409 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
374ca955 1410 len=(uint32_t)(stopstring-string);
b75a7d8f
A
1411
1412 if(len==uprv_strlen(string))
1413 {
2ca993e8 1414 result->add(value, *status);
b75a7d8f 1415 uprv_free(string);
729e4ab9 1416 token = peekToken(state, 0, NULL, NULL, NULL, status);
b75a7d8f
A
1417 }
1418 else
1419 {
1420 uprv_free(string);
1421 *status=U_INVALID_CHAR_FOUND;
1422 }
b75a7d8f
A
1423
1424 if (U_FAILURE(*status))
1425 {
46f4442e 1426 res_close(result);
b75a7d8f
A
1427 return NULL;
1428 }
1429
1430 /* the comma is optional (even though it is required to prevent the reader from concatenating
1431 consecutive entries) so that a missing comma on the last entry isn't an error */
1432 if (token == TOK_COMMA)
1433 {
729e4ab9 1434 getToken(state, NULL, NULL, NULL, status);
b75a7d8f
A
1435 }
1436 readToken = TRUE;
1437 }
1438
1439 /* not reached */
1440 /* A compiler warning will appear if all paths don't contain a return statement. */
1441/* intvector_close(result, status);
1442 *status = U_INTERNAL_PROGRAM_ERROR;
1443 return NULL;*/
1444}
1445
1446static struct SResource *
729e4ab9 1447parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f 1448{
2ca993e8
A
1449 uint32_t line;
1450 LocalMemory<char> string(getInvariantString(state, &line, NULL, status));
1451 if (string.isNull() || U_FAILURE(*status))
b75a7d8f
A
1452 {
1453 return NULL;
1454 }
1455
729e4ab9 1456 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
1457 if (U_FAILURE(*status))
1458 {
b75a7d8f
A
1459 return NULL;
1460 }
1461
1462 if(isVerbose()){
374ca955 1463 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1464 }
1465
2ca993e8 1466 uint32_t count = (uint32_t)uprv_strlen(string.getAlias());
b75a7d8f
A
1467 if (count > 0){
1468 if((count % 2)==0){
2ca993e8
A
1469 LocalMemory<uint8_t> value;
1470 if (value.allocateInsteadAndCopy(count) == NULL)
b75a7d8f 1471 {
b75a7d8f
A
1472 *status = U_MEMORY_ALLOCATION_ERROR;
1473 return NULL;
1474 }
1475
2ca993e8
A
1476 char toConv[3] = {'\0', '\0', '\0'};
1477 for (uint32_t i = 0; i < count; i += 2)
b75a7d8f
A
1478 {
1479 toConv[0] = string[i];
1480 toConv[1] = string[i + 1];
1481
2ca993e8 1482 char *stopstring;
b75a7d8f 1483 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
2ca993e8 1484 uint32_t len=(uint32_t)(stopstring-toConv);
b75a7d8f 1485
2ca993e8 1486 if(len!=2)
b75a7d8f 1487 {
b75a7d8f
A
1488 *status=U_INVALID_CHAR_FOUND;
1489 return NULL;
1490 }
1491 }
1492
2ca993e8 1493 return bin_open(state->bundle, tag, count >> 1, value.getAlias(), NULL, comment, status);
b75a7d8f
A
1494 }
1495 else
1496 {
1497 *status = U_INVALID_CHAR_FOUND;
2ca993e8 1498 error(line, "Encountered invalid binary value (length is odd)");
b75a7d8f
A
1499 return NULL;
1500 }
1501 }
1502 else
1503 {
2ca993e8
A
1504 warning(startline, "Encountered empty binary value");
1505 return bin_open(state->bundle, tag, 0, NULL, "", comment, status);
b75a7d8f 1506 }
b75a7d8f
A
1507}
1508
1509static struct SResource *
729e4ab9 1510parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
1511{
1512 struct SResource *result = NULL;
1513 int32_t value;
1514 char *string;
b75a7d8f
A
1515 char *stopstring;
1516 uint32_t len;
1517
729e4ab9 1518 string = getInvariantString(state, NULL, NULL, status);
b75a7d8f
A
1519
1520 if (string == NULL || U_FAILURE(*status))
1521 {
1522 return NULL;
1523 }
1524
729e4ab9 1525 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
1526
1527 if (U_FAILURE(*status))
1528 {
1529 uprv_free(string);
1530 return NULL;
1531 }
1532
1533 if(isVerbose()){
374ca955 1534 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1535 }
1536
1537 if (uprv_strlen(string) <= 0)
1538 {
1539 warning(startline, "Encountered empty integer. Default value is 0.");
1540 }
1541
46f4442e
A
1542 /* Allow integer support for hexdecimal, octal digit and decimal*/
1543 /* and handle illegal char in the integer*/
b75a7d8f 1544 value = uprv_strtoul(string, &stopstring, 0);
374ca955 1545 len=(uint32_t)(stopstring-string);
b75a7d8f
A
1546 if(len==uprv_strlen(string))
1547 {
729e4ab9 1548 result = int_open(state->bundle, tag, value, comment, status);
b75a7d8f
A
1549 }
1550 else
1551 {
1552 *status=U_INVALID_CHAR_FOUND;
1553 }
1554 uprv_free(string);
1555
1556 return result;
1557}
1558
1559static struct SResource *
729e4ab9 1560parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
b75a7d8f 1561{
b75a7d8f 1562 uint32_t line;
2ca993e8 1563 LocalMemory<char> filename(getInvariantString(state, &line, NULL, status));
b75a7d8f
A
1564 if (U_FAILURE(*status))
1565 {
1566 return NULL;
1567 }
1568
729e4ab9 1569 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
1570
1571 if (U_FAILURE(*status))
1572 {
b75a7d8f
A
1573 return NULL;
1574 }
1575
1576 if(isVerbose()){
374ca955 1577 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1578 }
1579
1580 /* Open the input file for reading */
2ca993e8
A
1581 CharString fullname;
1582 if (state->inputdir != NULL) {
1583 fullname.append(state->inputdir, *status);
b75a7d8f 1584 }
2ca993e8
A
1585 fullname.appendPathPart(filename.getAlias(), *status);
1586 if (U_FAILURE(*status)) {
1587 return NULL;
b75a7d8f
A
1588 }
1589
2ca993e8 1590 FileStream *file = T_FileStream_open(fullname.data(), "rb");
b75a7d8f
A
1591 if (file == NULL)
1592 {
2ca993e8 1593 error(line, "couldn't open input file %s", filename.getAlias());
b75a7d8f
A
1594 *status = U_FILE_ACCESS_ERROR;
1595 return NULL;
1596 }
1597
2ca993e8
A
1598 int32_t len = T_FileStream_size(file);
1599 LocalMemory<uint8_t> data;
1600 if(data.allocateInsteadAndCopy(len) == NULL)
b75a7d8f
A
1601 {
1602 *status = U_MEMORY_ALLOCATION_ERROR;
1603 T_FileStream_close (file);
1604 return NULL;
1605 }
1606
2ca993e8 1607 /* int32_t numRead = */ T_FileStream_read(file, data.getAlias(), len);
b75a7d8f
A
1608 T_FileStream_close (file);
1609
2ca993e8 1610 return bin_open(state->bundle, tag, len, data.getAlias(), fullname.data(), comment, status);
b75a7d8f
A
1611}
1612
1613static struct SResource *
729e4ab9 1614parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
b75a7d8f
A
1615{
1616 struct SResource *result;
1617 int32_t len=0;
1618 char *filename;
1619 uint32_t line;
1620 UChar *pTarget = NULL;
1621
1622 UCHARBUF *ucbuf;
1623 char *fullname = NULL;
1624 int32_t count = 0;
1625 const char* cp = NULL;
1626 const UChar* uBuffer = NULL;
1627
729e4ab9 1628 filename = getInvariantString(state, &line, NULL, status);
374ca955 1629 count = (int32_t)uprv_strlen(filename);
b75a7d8f
A
1630
1631 if (U_FAILURE(*status))
1632 {
1633 return NULL;
1634 }
1635
729e4ab9 1636 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
1637
1638 if (U_FAILURE(*status))
1639 {
1640 uprv_free(filename);
1641 return NULL;
1642 }
1643
1644 if(isVerbose()){
374ca955 1645 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1646 }
1647
729e4ab9 1648 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
b75a7d8f
A
1649 /* test for NULL */
1650 if(fullname == NULL)
1651 {
1652 *status = U_MEMORY_ALLOCATION_ERROR;
1653 uprv_free(filename);
1654 return NULL;
374ca955 1655 }
b75a7d8f 1656
729e4ab9
A
1657 if(state->inputdir!=NULL){
1658 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
b75a7d8f
A
1659 {
1660
729e4ab9 1661 uprv_strcpy(fullname, state->inputdir);
b75a7d8f 1662
729e4ab9
A
1663 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1664 fullname[state->inputdirLength + 1] = '\0';
b75a7d8f
A
1665
1666 uprv_strcat(fullname, filename);
1667 }
1668 else
1669 {
729e4ab9 1670 uprv_strcpy(fullname, state->inputdir);
b75a7d8f
A
1671 uprv_strcat(fullname, filename);
1672 }
1673 }else{
1674 uprv_strcpy(fullname,filename);
1675 }
1676
1677 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1678
1679 if (U_FAILURE(*status)) {
1680 error(line, "couldn't open input file %s\n", filename);
1681 return NULL;
1682 }
1683
1684 uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
729e4ab9 1685 result = string_open(state->bundle, tag, uBuffer, len, comment, status);
b75a7d8f 1686
51004dcb
A
1687 ucbuf_close(ucbuf);
1688
b75a7d8f
A
1689 uprv_free(pTarget);
1690
1691 uprv_free(filename);
1692 uprv_free(fullname);
1693
1694 return result;
1695}
1696
73c04bcf
A
1697
1698
1699
1700
1701U_STRING_DECL(k_type_string, "string", 6);
1702U_STRING_DECL(k_type_binary, "binary", 6);
1703U_STRING_DECL(k_type_bin, "bin", 3);
1704U_STRING_DECL(k_type_table, "table", 5);
1705U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17);
1706U_STRING_DECL(k_type_int, "int", 3);
1707U_STRING_DECL(k_type_integer, "integer", 7);
1708U_STRING_DECL(k_type_array, "array", 5);
1709U_STRING_DECL(k_type_alias, "alias", 5);
1710U_STRING_DECL(k_type_intvector, "intvector", 9);
1711U_STRING_DECL(k_type_import, "import", 6);
1712U_STRING_DECL(k_type_include, "include", 7);
73c04bcf
A
1713
1714/* Various non-standard processing plugins that create one or more special resources. */
1715U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1716U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18);
1717U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23);
1718U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19);
1719
1720typedef enum EResourceType
1721{
57a6839d
A
1722 RESTYPE_UNKNOWN,
1723 RESTYPE_STRING,
1724 RESTYPE_BINARY,
1725 RESTYPE_TABLE,
1726 RESTYPE_TABLE_NO_FALLBACK,
1727 RESTYPE_INTEGER,
1728 RESTYPE_ARRAY,
1729 RESTYPE_ALIAS,
1730 RESTYPE_INTVECTOR,
1731 RESTYPE_IMPORT,
1732 RESTYPE_INCLUDE,
1733 RESTYPE_PROCESS_UCA_RULES,
1734 RESTYPE_PROCESS_COLLATION,
1735 RESTYPE_PROCESS_TRANSLITERATOR,
1736 RESTYPE_PROCESS_DEPENDENCY,
1737 RESTYPE_RESERVED
73c04bcf
A
1738} EResourceType;
1739
1740static struct {
1741 const char *nameChars; /* only used for debugging */
1742 const UChar *nameUChars;
1743 ParseResourceFunction *parseFunction;
1744} gResourceTypes[] = {
1745 {"Unknown", NULL, NULL},
1746 {"string", k_type_string, parseString},
1747 {"binary", k_type_binary, parseBinary},
1748 {"table", k_type_table, parseTable},
1749 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1750 {"integer", k_type_integer, parseInteger},
1751 {"array", k_type_array, parseArray},
1752 {"alias", k_type_alias, parseAlias},
1753 {"intvector", k_type_intvector, parseIntVector},
1754 {"import", k_type_import, parseImport},
1755 {"include", k_type_include, parseInclude},
1756 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1757 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1758 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1759 {"process(dependency)", k_type_plugin_dependency, parseDependency},
1760 {"reserved", NULL, NULL}
1761};
1762
57a6839d 1763void initParser()
73c04bcf 1764{
73c04bcf
A
1765 U_STRING_INIT(k_type_string, "string", 6);
1766 U_STRING_INIT(k_type_binary, "binary", 6);
1767 U_STRING_INIT(k_type_bin, "bin", 3);
1768 U_STRING_INIT(k_type_table, "table", 5);
1769 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17);
1770 U_STRING_INIT(k_type_int, "int", 3);
1771 U_STRING_INIT(k_type_integer, "integer", 7);
1772 U_STRING_INIT(k_type_array, "array", 5);
1773 U_STRING_INIT(k_type_alias, "alias", 5);
1774 U_STRING_INIT(k_type_intvector, "intvector", 9);
1775 U_STRING_INIT(k_type_import, "import", 6);
73c04bcf
A
1776 U_STRING_INIT(k_type_include, "include", 7);
1777
1778 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1779 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18);
1780 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23);
1781 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19);
73c04bcf
A
1782}
1783
4388f060 1784static inline UBool isTable(enum EResourceType type) {
57a6839d 1785 return (UBool)(type==RESTYPE_TABLE || type==RESTYPE_TABLE_NO_FALLBACK);
73c04bcf
A
1786}
1787
1788static enum EResourceType
729e4ab9 1789parseResourceType(ParseState* state, UErrorCode *status)
73c04bcf
A
1790{
1791 struct UString *tokenValue;
1792 struct UString comment;
57a6839d 1793 enum EResourceType result = RESTYPE_UNKNOWN;
73c04bcf
A
1794 uint32_t line=0;
1795 ustr_init(&comment);
729e4ab9 1796 expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
73c04bcf
A
1797
1798 if (U_FAILURE(*status))
1799 {
57a6839d 1800 return RESTYPE_UNKNOWN;
73c04bcf
A
1801 }
1802
1803 *status = U_ZERO_ERROR;
1804
1805 /* Search for normal types */
57a6839d
A
1806 result=RESTYPE_UNKNOWN;
1807 while ((result=(EResourceType)(result+1)) < RESTYPE_RESERVED) {
73c04bcf
A
1808 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1809 break;
1810 }
1811 }
1812 /* Now search for the aliases */
1813 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
57a6839d 1814 result = RESTYPE_INTEGER;
73c04bcf
A
1815 }
1816 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
57a6839d 1817 result = RESTYPE_BINARY;
73c04bcf 1818 }
57a6839d 1819 else if (result == RESTYPE_RESERVED) {
73c04bcf
A
1820 char tokenBuffer[1024];
1821 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1822 tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1823 *status = U_INVALID_FORMAT_ERROR;
1824 error(line, "unknown resource type '%s'", tokenBuffer);
1825 }
1826
1827 return result;
1828}
1829
1830/* parse a non-top-level resource */
b75a7d8f 1831static struct SResource *
729e4ab9 1832parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
1833{
1834 enum ETokenType token;
57a6839d 1835 enum EResourceType resType = RESTYPE_UNKNOWN;
73c04bcf 1836 ParseResourceFunction *parseFunction = NULL;
b75a7d8f
A
1837 struct UString *tokenValue;
1838 uint32_t startline;
1839 uint32_t line;
1840
4388f060 1841
729e4ab9 1842 token = getToken(state, &tokenValue, NULL, &startline, status);
b75a7d8f
A
1843
1844 if(isVerbose()){
374ca955 1845 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1846 }
1847
1848 /* name . [ ':' type ] '{' resource '}' */
1849 /* This function parses from the colon onwards. If the colon is present, parse the
1850 type then try to parse a resource of that type. If there is no explicit type,
1851 work it out using the lookahead tokens. */
1852 switch (token)
1853 {
1854 case TOK_EOF:
1855 *status = U_INVALID_FORMAT_ERROR;
1856 error(startline, "Unexpected EOF encountered");
1857 return NULL;
1858
1859 case TOK_ERROR:
1860 *status = U_INVALID_FORMAT_ERROR;
1861 return NULL;
1862
1863 case TOK_COLON:
729e4ab9
A
1864 resType = parseResourceType(state, status);
1865 expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
b75a7d8f
A
1866
1867 if (U_FAILURE(*status))
1868 {
1869 return NULL;
1870 }
1871
1872 break;
1873
1874 case TOK_OPEN_BRACE:
1875 break;
1876
1877 default:
1878 *status = U_INVALID_FORMAT_ERROR;
1879 error(startline, "syntax error while reading a resource, expected '{' or ':'");
1880 return NULL;
1881 }
1882
4388f060 1883
57a6839d 1884 if (resType == RESTYPE_UNKNOWN)
b75a7d8f
A
1885 {
1886 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
1887 We could have any of the following:
1888 { { => array (nested)
1889 { :/} => array
1890 { string , => string array
1891
b75a7d8f
A
1892 { string { => table
1893
b75a7d8f
A
1894 { string :/{ => table
1895 { string } => string
1896 */
1897
729e4ab9 1898 token = peekToken(state, 0, NULL, &line, NULL,status);
b75a7d8f
A
1899
1900 if (U_FAILURE(*status))
1901 {
1902 return NULL;
1903 }
1904
b75a7d8f
A
1905 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
1906 {
57a6839d 1907 resType = RESTYPE_ARRAY;
b75a7d8f
A
1908 }
1909 else if (token == TOK_STRING)
1910 {
729e4ab9 1911 token = peekToken(state, 1, NULL, &line, NULL, status);
b75a7d8f
A
1912
1913 if (U_FAILURE(*status))
1914 {
1915 return NULL;
1916 }
1917
1918 switch (token)
1919 {
57a6839d
A
1920 case TOK_COMMA: resType = RESTYPE_ARRAY; break;
1921 case TOK_OPEN_BRACE: resType = RESTYPE_TABLE; break;
1922 case TOK_CLOSE_BRACE: resType = RESTYPE_STRING; break;
1923 case TOK_COLON: resType = RESTYPE_TABLE; break;
b75a7d8f
A
1924 default:
1925 *status = U_INVALID_FORMAT_ERROR;
1926 error(line, "Unexpected token after string, expected ',', '{' or '}'");
1927 return NULL;
1928 }
1929 }
1930 else
1931 {
1932 *status = U_INVALID_FORMAT_ERROR;
1933 error(line, "Unexpected token after '{'");
1934 return NULL;
1935 }
1936
1937 /* printf("Type guessed as %s\n", resourceNames[resType]); */
57a6839d 1938 } else if(resType == RESTYPE_TABLE_NO_FALLBACK) {
73c04bcf
A
1939 *status = U_INVALID_FORMAT_ERROR;
1940 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
1941 return NULL;
b75a7d8f
A
1942 }
1943
4388f060 1944
b75a7d8f
A
1945 /* We should now know what we need to parse next, so call the appropriate parser
1946 function and return. */
73c04bcf
A
1947 parseFunction = gResourceTypes[resType].parseFunction;
1948 if (parseFunction != NULL) {
729e4ab9 1949 return parseFunction(state, tag, startline, comment, status);
73c04bcf
A
1950 }
1951 else {
b75a7d8f 1952 *status = U_INTERNAL_PROGRAM_ERROR;
73c04bcf 1953 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
b75a7d8f
A
1954 }
1955
1956 return NULL;
1957}
1958
73c04bcf 1959/* parse the top-level resource */
b75a7d8f 1960struct SRBRoot *
57a6839d
A
1961parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, const char *filename,
1962 UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status)
b75a7d8f
A
1963{
1964 struct UString *tokenValue;
374ca955 1965 struct UString comment;
b75a7d8f 1966 uint32_t line;
b75a7d8f
A
1967 enum EResourceType bundleType;
1968 enum ETokenType token;
729e4ab9
A
1969 ParseState state;
1970 uint32_t i;
4388f060
A
1971
1972
729e4ab9
A
1973 for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
1974 {
1975 ustr_init(&state.lookahead[i].value);
1976 ustr_init(&state.lookahead[i].comment);
1977 }
b75a7d8f 1978
729e4ab9 1979 initLookahead(&state, buf, status);
b75a7d8f 1980
729e4ab9
A
1981 state.inputdir = inputDir;
1982 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
1983 state.outputdir = outputDir;
1984 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
57a6839d 1985 state.filename = filename;
4388f060 1986 state.makeBinaryCollation = makeBinaryCollation;
57a6839d 1987 state.omitCollationRules = omitCollationRules;
374ca955
A
1988
1989 ustr_init(&comment);
729e4ab9 1990 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
b75a7d8f 1991
2ca993e8 1992 state.bundle = new SRBRoot(&comment, FALSE, *status);
b75a7d8f 1993
729e4ab9 1994 if (state.bundle == NULL || U_FAILURE(*status))
b75a7d8f
A
1995 {
1996 return NULL;
1997 }
1998
374ca955 1999
2ca993e8 2000 state.bundle->setLocale(tokenValue->fChars, *status);
729e4ab9 2001
b75a7d8f 2002 /* The following code is to make Empty bundle work no matter with :table specifer or not */
729e4ab9 2003 token = getToken(&state, NULL, NULL, &line, status);
73c04bcf 2004 if(token==TOK_COLON) {
b75a7d8f 2005 *status=U_ZERO_ERROR;
729e4ab9 2006 bundleType=parseResourceType(&state, status);
b75a7d8f 2007
73c04bcf 2008 if(isTable(bundleType))
b75a7d8f 2009 {
729e4ab9 2010 expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
b75a7d8f
A
2011 }
2012 else
2013 {
2014 *status=U_PARSE_ERROR;
4388f060 2015 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
b75a7d8f
A
2016 }
2017 }
2018 else
2019 {
73c04bcf 2020 /* not a colon */
b75a7d8f
A
2021 if(token==TOK_OPEN_BRACE)
2022 {
2023 *status=U_ZERO_ERROR;
57a6839d 2024 bundleType=RESTYPE_TABLE;
b75a7d8f
A
2025 }
2026 else
2027 {
73c04bcf
A
2028 /* neither colon nor open brace */
2029 *status=U_PARSE_ERROR;
57a6839d 2030 bundleType=RESTYPE_UNKNOWN;
b75a7d8f
A
2031 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
2032 }
2033 }
b75a7d8f
A
2034
2035 if (U_FAILURE(*status))
2036 {
2ca993e8 2037 delete state.bundle;
b75a7d8f
A
2038 return NULL;
2039 }
2040
57a6839d 2041 if(bundleType==RESTYPE_TABLE_NO_FALLBACK) {
73c04bcf
A
2042 /*
2043 * Parse a top-level table with the table(nofallback) declaration.
2044 * This is the same as a regular table, but also sets the
2045 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2046 */
2ca993e8 2047 state.bundle->fNoFallback=TRUE;
73c04bcf
A
2048 }
2049 /* top-level tables need not handle special table names like "collations" */
2ca993e8
A
2050 assert(!state.bundle->fIsPoolBundle);
2051 assert(state.bundle->fRoot->fType == URES_TABLE);
2052 TableResource *rootTable = static_cast<TableResource *>(state.bundle->fRoot);
2053 realParseTable(&state, rootTable, NULL, line, status);
73c04bcf 2054 if(dependencyArray!=NULL){
2ca993e8 2055 rootTable->add(dependencyArray, 0, *status);
73c04bcf
A
2056 dependencyArray = NULL;
2057 }
4388f060 2058 if (U_FAILURE(*status))
b75a7d8f 2059 {
2ca993e8 2060 delete state.bundle;
46f4442e 2061 res_close(dependencyArray);
b75a7d8f
A
2062 return NULL;
2063 }
2064
729e4ab9 2065 if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
b75a7d8f
A
2066 {
2067 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
2068 if(isStrict()){
2069 *status = U_INVALID_FORMAT_ERROR;
2070 return NULL;
2071 }
2072 }
2073
729e4ab9 2074 cleanupLookahead(&state);
46f4442e 2075 ustr_deinit(&comment);
729e4ab9 2076 return state.bundle;
b75a7d8f 2077}