]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/genrb/parse.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / tools / genrb / parse.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4*******************************************************************************
5*
b331163b 6* Copyright (C) 1998-2015, International Business Machines
b75a7d8f
A
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10*
4388f060 11* File parse.cpp
b75a7d8f
A
12*
13* Modification History:
14*
15* Date Name Description
16* 05/26/99 stephen Creation.
17* 02/25/00 weiv Overhaul to write udata
18* 5/10/01 Ram removed ustdio dependency
19* 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
20*******************************************************************************
21*/
22
57a6839d
A
23// Safer use of UnicodeString.
24#ifndef UNISTR_FROM_CHAR_EXPLICIT
25# define UNISTR_FROM_CHAR_EXPLICIT explicit
26#endif
27
28// Less important, but still a good idea.
29#ifndef UNISTR_FROM_STRING_EXPLICIT
30# define UNISTR_FROM_STRING_EXPLICIT explicit
31#endif
32
2ca993e8 33#include <assert.h>
b75a7d8f
A
34#include "parse.h"
35#include "errmsg.h"
36#include "uhash.h"
37#include "cmemory.h"
38#include "cstring.h"
374ca955 39#include "uinvchar.h"
b75a7d8f
A
40#include "read.h"
41#include "ustr.h"
42#include "reslist.h"
73c04bcf 43#include "rbt_pars.h"
729e4ab9 44#include "genrb.h"
2ca993e8
A
45#include "unicode/stringpiece.h"
46#include "unicode/unistr.h"
b75a7d8f 47#include "unicode/ustring.h"
729e4ab9 48#include "unicode/uscript.h"
57a6839d 49#include "unicode/utf16.h"
b75a7d8f 50#include "unicode/putil.h"
2ca993e8 51#include "charstr.h"
57a6839d
A
52#include "collationbuilder.h"
53#include "collationdata.h"
54#include "collationdatareader.h"
55#include "collationdatawriter.h"
56#include "collationfastlatinbuilder.h"
57#include "collationinfo.h"
58#include "collationroot.h"
59#include "collationruleparser.h"
60#include "collationtailoring.h"
73c04bcf 61#include <stdio.h>
b75a7d8f
A
62
63/* Number of tokens to read ahead of the current stream position */
374ca955 64#define MAX_LOOKAHEAD 3
b75a7d8f 65
b75a7d8f
A
66#define CR 0x000D
67#define LF 0x000A
68#define SPACE 0x0020
374ca955 69#define TAB 0x0009
b75a7d8f
A
70#define ESCAPE 0x005C
71#define HASH 0x0023
72#define QUOTE 0x0027
73c04bcf 73#define ZERO 0x0030
b75a7d8f
A
74#define STARTCOMMAND 0x005B
75#define ENDCOMMAND 0x005D
73c04bcf
A
76#define OPENSQBRACKET 0x005B
77#define CLOSESQBRACKET 0x005D
b75a7d8f 78
2ca993e8
A
79using icu::CharString;
80using icu::LocalMemory;
57a6839d 81using icu::LocalPointer;
2ca993e8
A
82using icu::LocalUCHARBUFPointer;
83using icu::StringPiece;
57a6839d
A
84using icu::UnicodeString;
85
b75a7d8f
A
86struct Lookahead
87{
88 enum ETokenType type;
374ca955
A
89 struct UString value;
90 struct UString comment;
91 uint32_t line;
b75a7d8f
A
92};
93
94/* keep in sync with token defines in read.h */
374ca955 95const char *tokenNames[TOK_TOKEN_COUNT] =
b75a7d8f
A
96{
97 "string", /* A string token, such as "MonthNames" */
98 "'{'", /* An opening brace character */
99 "'}'", /* A closing brace character */
100 "','", /* A comma */
101 "':'", /* A colon */
102
103 "<end of file>", /* End of the file has been reached successfully */
374ca955 104 "<end of line>"
b75a7d8f
A
105};
106
107/* Just to store "TRUE" */
51004dcb 108//static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
b75a7d8f 109
729e4ab9
A
110typedef struct {
111 struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
112 uint32_t lookaheadPosition;
113 UCHARBUF *buffer;
114 struct SRBRoot *bundle;
115 const char *inputdir;
116 uint32_t inputdirLength;
117 const char *outputdir;
118 uint32_t outputdirLength;
57a6839d 119 const char *filename;
4388f060 120 UBool makeBinaryCollation;
57a6839d 121 UBool omitCollationRules;
729e4ab9 122} ParseState;
b75a7d8f 123
729e4ab9
A
124typedef struct SResource *
125ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
126
127static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
b75a7d8f 128
b75a7d8f
A
129/* The nature of the lookahead buffer:
130 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
131 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
132 When getToken is called, the current pointer is moved to the next slot and the
133 old slot is filled with the next token from the reader by calling getNextToken.
134 The token values are stored in the slot, which means that token values don't
135 survive a call to getToken, ie.
136
137 UString *value;
138
139 getToken(&value, NULL, status);
140 getToken(NULL, NULL, status); bad - value is now a different string
141*/
142static void
729e4ab9 143initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
b75a7d8f
A
144{
145 static uint32_t initTypeStrings = 0;
146 uint32_t i;
147
148 if (!initTypeStrings)
149 {
150 initTypeStrings = 1;
151 }
152
729e4ab9
A
153 state->lookaheadPosition = 0;
154 state->buffer = buf;
b75a7d8f
A
155
156 resetLineNumber();
157
158 for (i = 0; i < MAX_LOOKAHEAD; i++)
159 {
729e4ab9 160 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
b75a7d8f
A
161 if (U_FAILURE(*status))
162 {
163 return;
164 }
165 }
166
167 *status = U_ZERO_ERROR;
168}
169
46f4442e 170static void
729e4ab9 171cleanupLookahead(ParseState* state)
46f4442e
A
172{
173 uint32_t i;
51004dcb 174 for (i = 0; i <= MAX_LOOKAHEAD; i++)
46f4442e 175 {
729e4ab9
A
176 ustr_deinit(&state->lookahead[i].value);
177 ustr_deinit(&state->lookahead[i].comment);
46f4442e
A
178 }
179
180}
181
b75a7d8f 182static enum ETokenType
729e4ab9 183getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
b75a7d8f
A
184{
185 enum ETokenType result;
186 uint32_t i;
187
729e4ab9 188 result = state->lookahead[state->lookaheadPosition].type;
b75a7d8f
A
189
190 if (tokenValue != NULL)
191 {
729e4ab9 192 *tokenValue = &state->lookahead[state->lookaheadPosition].value;
b75a7d8f
A
193 }
194
195 if (linenumber != NULL)
196 {
729e4ab9 197 *linenumber = state->lookahead[state->lookaheadPosition].line;
b75a7d8f
A
198 }
199
374ca955
A
200 if (comment != NULL)
201 {
729e4ab9 202 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
374ca955
A
203 }
204
729e4ab9
A
205 i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
206 state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
207 ustr_setlen(&state->lookahead[i].comment, 0, status);
208 ustr_setlen(&state->lookahead[i].value, 0, status);
209 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
b75a7d8f
A
210
211 /* printf("getToken, returning %s\n", tokenNames[result]); */
212
213 return result;
214}
215
216static enum ETokenType
729e4ab9 217peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
b75a7d8f 218{
729e4ab9 219 uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
b75a7d8f
A
220
221 if (U_FAILURE(*status))
222 {
223 return TOK_ERROR;
224 }
225
226 if (lookaheadCount >= MAX_LOOKAHEAD)
227 {
228 *status = U_INTERNAL_PROGRAM_ERROR;
229 return TOK_ERROR;
230 }
231
232 if (tokenValue != NULL)
233 {
729e4ab9 234 *tokenValue = &state->lookahead[i].value;
b75a7d8f
A
235 }
236
237 if (linenumber != NULL)
238 {
729e4ab9 239 *linenumber = state->lookahead[i].line;
b75a7d8f
A
240 }
241
374ca955 242 if(comment != NULL){
729e4ab9 243 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
374ca955
A
244 }
245
729e4ab9 246 return state->lookahead[i].type;
b75a7d8f
A
247}
248
249static void
729e4ab9 250expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
b75a7d8f
A
251{
252 uint32_t line;
374ca955 253
729e4ab9 254 enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
b75a7d8f 255
73c04bcf 256 if (linenumber != NULL)
b75a7d8f 257 {
73c04bcf 258 *linenumber = line;
b75a7d8f
A
259 }
260
73c04bcf 261 if (U_FAILURE(*status))
b75a7d8f 262 {
73c04bcf 263 return;
b75a7d8f
A
264 }
265
266 if (token != expectedToken)
267 {
268 *status = U_INVALID_FORMAT_ERROR;
269 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
270 }
46f4442e 271 else
b75a7d8f
A
272 {
273 *status = U_ZERO_ERROR;
274 }
275}
276
340931cb
A
277static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment,
278 int32_t &stringLength, UErrorCode *status)
b75a7d8f
A
279{
280 struct UString *tokenValue;
281 char *result;
b75a7d8f 282
729e4ab9 283 expect(state, TOK_STRING, &tokenValue, comment, line, status);
b75a7d8f
A
284
285 if (U_FAILURE(*status))
286 {
287 return NULL;
288 }
289
340931cb 290 if(!uprv_isInvariantUString(tokenValue->fChars, tokenValue->fLength)) {
374ca955
A
291 *status = U_INVALID_FORMAT_ERROR;
292 error(*line, "invariant characters required for table keys, binary data, etc.");
293 return NULL;
294 }
295
340931cb 296 result = static_cast<char *>(uprv_malloc(tokenValue->fLength+1));
b75a7d8f
A
297
298 if (result == NULL)
299 {
300 *status = U_MEMORY_ALLOCATION_ERROR;
301 return NULL;
302 }
303
340931cb
A
304 u_UCharsToChars(tokenValue->fChars, result, tokenValue->fLength+1);
305 stringLength = tokenValue->fLength;
b75a7d8f
A
306 return result;
307}
308
b75a7d8f 309static struct SResource *
4388f060 310parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
b75a7d8f
A
311{
312 struct SResource *result = NULL;
313 struct UString *tokenValue;
314 FileStream *file = NULL;
315 char filename[256] = { '\0' };
316 char cs[128] = { '\0' };
317 uint32_t line;
b75a7d8f
A
318 UBool quoted = FALSE;
319 UCHARBUF *ucbuf=NULL;
320 UChar32 c = 0;
321 const char* cp = NULL;
322 UChar *pTarget = NULL;
323 UChar *target = NULL;
324 UChar *targetLimit = NULL;
325 int32_t size = 0;
326
729e4ab9 327 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
b75a7d8f
A
328
329 if(isVerbose()){
374ca955 330 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
331 }
332
333 if (U_FAILURE(*status))
334 {
335 return NULL;
336 }
337 /* make the filename including the directory */
729e4ab9 338 if (state->inputdir != NULL)
b75a7d8f 339 {
729e4ab9 340 uprv_strcat(filename, state->inputdir);
b75a7d8f 341
729e4ab9 342 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
b75a7d8f
A
343 {
344 uprv_strcat(filename, U_FILE_SEP_STRING);
345 }
346 }
347
348 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
349
729e4ab9 350 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
351
352 if (U_FAILURE(*status))
353 {
354 return NULL;
355 }
b75a7d8f
A
356 uprv_strcat(filename, cs);
357
57a6839d 358 if(state->omitCollationRules) {
46f4442e
A
359 return res_none();
360 }
b75a7d8f
A
361
362 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
363
364 if (U_FAILURE(*status)) {
0f5d89e8 365 error(line, "An error occurred while opening the input file %s\n", filename);
b75a7d8f
A
366 return NULL;
367 }
368
369 /* We allocate more space than actually required
370 * since the actual size needed for storing UChars
371 * is not known in UTF-8 byte stream
372 */
73c04bcf 373 size = ucbuf_size(ucbuf) + 1;
b75a7d8f 374 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
374ca955 375 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
b75a7d8f
A
376 target = pTarget;
377 targetLimit = pTarget+size;
378
379 /* read the rules into the buffer */
380 while (target < targetLimit)
381 {
382 c = ucbuf_getc(ucbuf, status);
383 if(c == QUOTE) {
73c04bcf 384 quoted = (UBool)!quoted;
b75a7d8f
A
385 }
386 /* weiv (06/26/2002): adding the following:
387 * - preserving spaces in commands [...]
388 * - # comments until the end of line
389 */
374ca955 390 if (c == STARTCOMMAND && !quoted)
b75a7d8f 391 {
73c04bcf
A
392 /* preserve commands
393 * closing bracket will be handled by the
394 * append at the end of the loop
395 */
396 while(c != ENDCOMMAND) {
51004dcb 397 U_APPEND_CHAR32_ONLY(c, target);
73c04bcf
A
398 c = ucbuf_getc(ucbuf, status);
399 }
400 }
401 else if (c == HASH && !quoted) {
402 /* skip comments */
403 while(c != CR && c != LF) {
404 c = ucbuf_getc(ucbuf, status);
405 }
406 continue;
407 }
408 else if (c == ESCAPE)
b75a7d8f
A
409 {
410 c = unescape(ucbuf, status);
411
4388f060 412 if (c == (UChar32)U_ERR)
b75a7d8f
A
413 {
414 uprv_free(pTarget);
415 T_FileStream_close(file);
416 return NULL;
417 }
418 }
374ca955 419 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
b75a7d8f 420 {
73c04bcf
A
421 /* ignore spaces carriage returns
422 * and line feed unless in the form \uXXXX
b75a7d8f
A
423 */
424 continue;
425 }
426
427 /* Append UChar * after dissembling if c > 0xffff*/
4388f060 428 if (c != (UChar32)U_EOF)
b75a7d8f 429 {
51004dcb 430 U_APPEND_CHAR32_ONLY(c, target);
b75a7d8f
A
431 }
432 else
433 {
434 break;
435 }
436 }
437
374ca955
A
438 /* terminate the string */
439 if(target < targetLimit){
440 *target = 0x0000;
441 }
442
729e4ab9 443 result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
374ca955 444
b75a7d8f
A
445
446 ucbuf_close(ucbuf);
447 uprv_free(pTarget);
448 T_FileStream_close(file);
449
450 return result;
451}
452
73c04bcf 453static struct SResource *
4388f060 454parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
73c04bcf
A
455{
456 struct SResource *result = NULL;
457 struct UString *tokenValue;
458 FileStream *file = NULL;
459 char filename[256] = { '\0' };
460 char cs[128] = { '\0' };
461 uint32_t line;
462 UCHARBUF *ucbuf=NULL;
463 const char* cp = NULL;
464 UChar *pTarget = NULL;
465 const UChar *pSource = NULL;
466 int32_t size = 0;
467
729e4ab9 468 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
73c04bcf
A
469
470 if(isVerbose()){
471 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
472 }
473
474 if (U_FAILURE(*status))
475 {
476 return NULL;
477 }
478 /* make the filename including the directory */
729e4ab9 479 if (state->inputdir != NULL)
73c04bcf 480 {
729e4ab9 481 uprv_strcat(filename, state->inputdir);
73c04bcf 482
729e4ab9 483 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
73c04bcf
A
484 {
485 uprv_strcat(filename, U_FILE_SEP_STRING);
486 }
487 }
488
489 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
490
729e4ab9 491 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
73c04bcf
A
492
493 if (U_FAILURE(*status))
494 {
495 return NULL;
496 }
497 uprv_strcat(filename, cs);
498
499
500 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
501
502 if (U_FAILURE(*status)) {
0f5d89e8 503 error(line, "An error occurred while opening the input file %s\n", filename);
73c04bcf
A
504 return NULL;
505 }
506
507 /* We allocate more space than actually required
508 * since the actual size needed for storing UChars
509 * is not known in UTF-8 byte stream
510 */
511 pSource = ucbuf_getBuffer(ucbuf, &size, status);
512 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
513 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
514
515#if !UCONFIG_NO_TRANSLITERATION
516 size = utrans_stripRules(pSource, size, pTarget, status);
517#else
46f4442e 518 size = 0;
73c04bcf
A
519 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
520#endif
729e4ab9 521 result = string_open(state->bundle, tag, pTarget, size, NULL, status);
73c04bcf
A
522
523 ucbuf_close(ucbuf);
524 uprv_free(pTarget);
525 T_FileStream_close(file);
526
527 return result;
528}
2ca993e8 529static ArrayResource* dependencyArray = NULL;
73c04bcf
A
530
531static struct SResource *
729e4ab9 532parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
73c04bcf
A
533{
534 struct SResource *result = NULL;
535 struct SResource *elem = NULL;
536 struct UString *tokenValue;
537 uint32_t line;
538 char filename[256] = { '\0' };
539 char cs[128] = { '\0' };
3d1f044b 540
729e4ab9 541 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
73c04bcf
A
542
543 if(isVerbose()){
544 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
545 }
546
547 if (U_FAILURE(*status))
548 {
549 return NULL;
550 }
551 /* make the filename including the directory */
729e4ab9 552 if (state->outputdir != NULL)
73c04bcf 553 {
729e4ab9 554 uprv_strcat(filename, state->outputdir);
73c04bcf 555
729e4ab9 556 if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
73c04bcf
A
557 {
558 uprv_strcat(filename, U_FILE_SEP_STRING);
559 }
560 }
3d1f044b 561
73c04bcf
A
562 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
563
564 if (U_FAILURE(*status))
565 {
566 return NULL;
567 }
568 uprv_strcat(filename, cs);
569 if(!T_FileStream_file_exists(filename)){
570 if(isStrict()){
571 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
572 }else{
3d1f044b 573 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
73c04bcf
A
574 }
575 }
576 if(dependencyArray==NULL){
729e4ab9 577 dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
73c04bcf
A
578 }
579 if(tag!=NULL){
729e4ab9 580 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
73c04bcf 581 }
729e4ab9 582 elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
73c04bcf 583
2ca993e8 584 dependencyArray->add(elem);
73c04bcf
A
585
586 if (U_FAILURE(*status))
587 {
588 return NULL;
589 }
729e4ab9 590 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
73c04bcf
A
591 return result;
592}
b75a7d8f 593static struct SResource *
729e4ab9 594parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
b75a7d8f
A
595{
596 struct UString *tokenValue;
597 struct SResource *result = NULL;
598
73c04bcf 599/* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
b75a7d8f
A
600 {
601 return parseUCARules(tag, startline, status);
73c04bcf 602 }*/
b75a7d8f 603 if(isVerbose()){
374ca955 604 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f 605 }
729e4ab9 606 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
b75a7d8f
A
607
608 if (U_SUCCESS(*status))
609 {
610 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
611 doesn't survive expect either) */
612
729e4ab9 613 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
b75a7d8f 614 if(U_SUCCESS(*status) && result) {
729e4ab9 615 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f 616
46f4442e
A
617 if (U_FAILURE(*status))
618 {
619 res_close(result);
620 return NULL;
621 }
b75a7d8f
A
622 }
623 }
624
625 return result;
626}
627
628static struct SResource *
729e4ab9 629parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
630{
631 struct UString *tokenValue;
374ca955 632 struct SResource *result = NULL;
b75a7d8f 633
729e4ab9 634 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
b75a7d8f
A
635
636 if(isVerbose()){
374ca955 637 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
638 }
639
640 if (U_SUCCESS(*status))
641 {
642 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
643 doesn't survive expect either) */
644
729e4ab9 645 result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
b75a7d8f 646
729e4ab9 647 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
648
649 if (U_FAILURE(*status))
650 {
46f4442e 651 res_close(result);
b75a7d8f
A
652 return NULL;
653 }
654 }
655
656 return result;
657}
658
57a6839d
A
659#if !UCONFIG_NO_COLLATION
660
661namespace {
729e4ab9
A
662
663static struct SResource* resLookup(struct SResource* res, const char* key){
2ca993e8 664 if (res == res_none() || !res->isTable()) {
729e4ab9
A
665 return NULL;
666 }
667
2ca993e8
A
668 TableResource *list = static_cast<TableResource *>(res);
669 SResource *current = list->fFirst;
729e4ab9
A
670 while (current != NULL) {
671 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
672 return current;
673 }
674 current = current->fNext;
675 }
676 return NULL;
677}
678
57a6839d
A
679class GenrbImporter : public icu::CollationRuleParser::Importer {
680public:
681 GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out) {}
682 virtual ~GenrbImporter();
b331163b 683 virtual void getRules(
57a6839d 684 const char *localeID, const char *collationType,
b331163b 685 UnicodeString &rules,
57a6839d
A
686 const char *&errorReason, UErrorCode &errorCode);
687
688private:
689 const char *inputDir;
690 const char *outputDir;
57a6839d
A
691};
692
693GenrbImporter::~GenrbImporter() {}
694
b331163b 695void
57a6839d
A
696GenrbImporter::getRules(
697 const char *localeID, const char *collationType,
b331163b 698 UnicodeString &rules,
57a6839d 699 const char *& /*errorReason*/, UErrorCode &errorCode) {
2ca993e8
A
700 CharString filename(localeID, errorCode);
701 for(int32_t i = 0; i < filename.length(); i++){
729e4ab9 702 if(filename[i] == '-'){
2ca993e8 703 filename.data()[i] = '_';
729e4ab9
A
704 }
705 }
2ca993e8 706 filename.append(".txt", errorCode);
57a6839d 707 if (U_FAILURE(errorCode)) {
b331163b 708 return;
729e4ab9 709 }
2ca993e8
A
710 CharString inputDirBuf;
711 CharString openFileName;
57a6839d 712 if(inputDir == NULL) {
2ca993e8 713 const char *filenameBegin = uprv_strrchr(filename.data(), U_FILE_SEP_CHAR);
729e4ab9
A
714 if (filenameBegin != NULL) {
715 /*
716 * When a filename ../../../data/root.txt is specified,
717 * we presume that the input directory is ../../../data
718 * This is very important when the resource file includes
719 * another file, like UCARules.txt or thaidict.brk.
720 */
2ca993e8
A
721 StringPiece dir = filename.toStringPiece();
722 const char *filenameLimit = filename.data() + filename.length();
723 dir.remove_suffix((int32_t)(filenameLimit - filenameBegin));
724 inputDirBuf.append(dir, errorCode);
725 inputDir = inputDirBuf.data();
729e4ab9
A
726 }
727 }else{
2ca993e8 728 int32_t dirlen = (int32_t)uprv_strlen(inputDir);
729e4ab9 729
2ca993e8 730 if((filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')) {
729e4ab9
A
731 /*
732 * append the input dir to openFileName if the first char in
2ca993e8 733 * filename is not file separator char and the last char input directory is not '.'.
729e4ab9
A
734 * This is to support :
735 * genrb -s. /home/icu/data
736 * genrb -s. icu/data
737 * The user cannot mix notations like
738 * genrb -s. /icu/data --- the absolute path specified. -s redundant
739 * user should use
740 * genrb -s. icu/data --- start from CWD and look in icu/data dir
741 */
2ca993e8
A
742 openFileName.append(inputDir, dirlen, errorCode);
743 if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
744 openFileName.append(U_FILE_SEP_CHAR, errorCode);
729e4ab9 745 }
729e4ab9
A
746 }
747 }
2ca993e8
A
748 openFileName.append(filename, errorCode);
749 if(U_FAILURE(errorCode)) {
750 return;
751 }
752 // printf("GenrbImporter::getRules(%s, %s) reads %s\n", localeID, collationType, openFileName.data());
753 const char* cp = "";
754 LocalUCHARBUFPointer ucbuf(
755 ucbuf_open(openFileName.data(), &cp, getShowWarning(), TRUE, &errorCode));
57a6839d 756 if(errorCode == U_FILE_ACCESS_ERROR) {
2ca993e8
A
757 fprintf(stderr, "couldn't open file %s\n", openFileName.data());
758 return;
729e4ab9 759 }
2ca993e8 760 if (ucbuf.isNull() || U_FAILURE(errorCode)) {
0f5d89e8 761 fprintf(stderr, "An error occurred processing file %s. Error: %s\n", openFileName.data(), u_errorName(errorCode));
2ca993e8 762 return;
729e4ab9
A
763 }
764
765 /* Parse the data into an SRBRoot */
0f5d89e8
A
766 LocalPointer<SRBRoot> data(
767 parse(ucbuf.getAlias(), inputDir, outputDir, filename.data(), FALSE, FALSE, &errorCode));
b331163b 768 if (U_FAILURE(errorCode)) {
2ca993e8 769 return;
b331163b 770 }
729e4ab9 771
2ca993e8
A
772 struct SResource *root = data->fRoot;
773 struct SResource *collations = resLookup(root, "collations");
4388f060 774 if (collations != NULL) {
2ca993e8 775 struct SResource *collation = resLookup(collations, collationType);
4388f060 776 if (collation != NULL) {
2ca993e8
A
777 struct SResource *sequence = resLookup(collation, "Sequence");
778 if (sequence != NULL && sequence->isString()) {
b331163b 779 // No string pointer aliasing so that we need not hold onto the resource bundle.
2ca993e8
A
780 StringResource *sr = static_cast<StringResource *>(sequence);
781 rules = sr->fString;
4388f060
A
782 }
783 }
784 }
729e4ab9
A
785}
786
4388f060
A
787// Quick-and-dirty escaping function.
788// Assumes that we are on an ASCII-based platform.
789static void
790escape(const UChar *s, char *buffer) {
791 int32_t length = u_strlen(s);
792 int32_t i = 0;
793 for (;;) {
794 UChar32 c;
795 U16_NEXT(s, i, length, c);
796 if (c == 0) {
797 *buffer = 0;
798 return;
799 } else if (0x20 <= c && c <= 0x7e) {
800 // printable ASCII
801 *buffer++ = (char)c; // assumes ASCII-based platform
802 } else {
803 buffer += sprintf(buffer, "\\u%04X", (int)c);
804 }
805 }
806}
807
57a6839d
A
808} // namespace
809
810#endif // !UCONFIG_NO_COLLATION
811
2ca993e8
A
812static TableResource *
813addCollation(ParseState* state, TableResource *result, const char *collationType,
57a6839d 814 uint32_t startline, UErrorCode *status)
b75a7d8f 815{
57a6839d 816 // TODO: Use LocalPointer for result, or make caller close it when there is a failure.
b75a7d8f
A
817 struct SResource *member = NULL;
818 struct UString *tokenValue;
374ca955 819 struct UString comment;
b75a7d8f
A
820 enum ETokenType token;
821 char subtag[1024];
57a6839d
A
822 UnicodeString rules;
823 UBool haveRules = FALSE;
b75a7d8f 824 UVersionInfo version;
b75a7d8f 825 uint32_t line;
57a6839d 826
b75a7d8f 827 /* '{' . (name resource)* '}' */
374ca955
A
828 version[0]=0; version[1]=0; version[2]=0; version[3]=0;
829
b75a7d8f
A
830 for (;;)
831 {
374ca955 832 ustr_init(&comment);
729e4ab9 833 token = getToken(state, &tokenValue, &comment, &line, status);
b75a7d8f
A
834
835 if (token == TOK_CLOSE_BRACE)
836 {
57a6839d 837 break;
b75a7d8f
A
838 }
839
840 if (token != TOK_STRING)
841 {
46f4442e 842 res_close(result);
b75a7d8f
A
843 *status = U_INVALID_FORMAT_ERROR;
844
845 if (token == TOK_EOF)
846 {
847 error(startline, "unterminated table");
848 }
849 else
850 {
851 error(line, "Unexpected token %s", tokenNames[token]);
852 }
853
854 return NULL;
855 }
856
857 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
858
859 if (U_FAILURE(*status))
860 {
46f4442e 861 res_close(result);
b75a7d8f
A
862 return NULL;
863 }
864
729e4ab9 865 member = parseResource(state, subtag, NULL, status);
b75a7d8f
A
866
867 if (U_FAILURE(*status))
868 {
46f4442e 869 res_close(result);
b75a7d8f
A
870 return NULL;
871 }
57a6839d
A
872 if (result == NULL)
873 {
874 // Ignore the parsed resources, continue parsing.
875 }
2ca993e8 876 else if (uprv_strcmp(subtag, "Version") == 0 && member->isString())
b75a7d8f 877 {
2ca993e8 878 StringResource *sr = static_cast<StringResource *>(member);
b75a7d8f 879 char ver[40];
2ca993e8 880 int32_t length = sr->length();
b75a7d8f 881
2ca993e8 882 if (length >= UPRV_LENGTHOF(ver))
b75a7d8f 883 {
2ca993e8 884 length = UPRV_LENGTHOF(ver) - 1;
b75a7d8f
A
885 }
886
2ca993e8 887 sr->fString.extract(0, length, ver, UPRV_LENGTHOF(ver), US_INV);
b75a7d8f
A
888 u_versionFromString(version, ver);
889
2ca993e8 890 result->add(member, line, *status);
57a6839d 891 member = NULL;
b75a7d8f
A
892 }
893 else if(uprv_strcmp(subtag, "%%CollationBin")==0)
894 {
895 /* discard duplicate %%CollationBin if any*/
896 }
2ca993e8 897 else if (uprv_strcmp(subtag, "Sequence") == 0 && member->isString())
b75a7d8f 898 {
2ca993e8
A
899 StringResource *sr = static_cast<StringResource *>(member);
900 rules = sr->fString;
57a6839d
A
901 haveRules = TRUE;
902 // Defer building the collator until we have seen
903 // all sub-elements of the collation table, including the Version.
729e4ab9
A
904 /* in order to achieve smaller data files, we can direct genrb */
905 /* to omit collation rules */
57a6839d 906 if(!state->omitCollationRules) {
2ca993e8 907 result->add(member, line, *status);
57a6839d 908 member = NULL;
729e4ab9 909 }
b75a7d8f 910 }
57a6839d
A
911 else // Just copy non-special items.
912 {
2ca993e8 913 result->add(member, line, *status);
57a6839d
A
914 member = NULL;
915 }
916 res_close(member); // TODO: use LocalPointer
b75a7d8f
A
917 if (U_FAILURE(*status))
918 {
46f4442e 919 res_close(result);
b75a7d8f
A
920 return NULL;
921 }
922 }
923
57a6839d
A
924 if (!haveRules) { return result; }
925
926#if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
927 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
928 (void)collationType;
929#else
b331163b
A
930 // CLDR ticket #3949, ICU ticket #8082:
931 // Do not build collation binary data for for-import-only "private" collation rule strings.
932 if (uprv_strncmp(collationType, "private-", 8) == 0) {
933 if(isVerbose()) {
934 printf("Not building %s~%s collation binary\n", state->filename, collationType);
935 }
936 return result;
937 }
938
57a6839d
A
939 if(!state->makeBinaryCollation) {
940 if(isVerbose()) {
941 printf("Not building %s~%s collation binary\n", state->filename, collationType);
942 }
943 return result;
944 }
945 UErrorCode intStatus = U_ZERO_ERROR;
946 UParseError parseError;
947 uprv_memset(&parseError, 0, sizeof(parseError));
948 GenrbImporter importer(state->inputdir, state->outputdir);
949 const icu::CollationTailoring *base = icu::CollationRoot::getRoot(intStatus);
950 if(U_FAILURE(intStatus)) {
951 error(line, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus));
952 res_close(result);
953 return NULL; // TODO: use LocalUResourceBundlePointer for result
954 }
955 icu::CollationBuilder builder(base, intStatus);
956 if(uprv_strncmp(collationType, "search", 6) == 0) {
957 builder.disableFastLatin(); // build fast-Latin table unless search collator
958 }
959 LocalPointer<icu::CollationTailoring> t(
960 builder.parseAndBuild(rules, version, &importer, &parseError, intStatus));
961 if(U_FAILURE(intStatus)) {
962 const char *reason = builder.getErrorReason();
963 if(reason == NULL) { reason = ""; }
964 error(line, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s %s",
965 state->filename, collationType,
966 (long)parseError.offset, u_errorName(intStatus), reason);
967 if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) {
968 // Print pre- and post-context.
969 char preBuffer[100], postBuffer[100];
970 escape(parseError.preContext, preBuffer);
971 escape(parseError.postContext, postBuffer);
972 error(line, " error context: \"...%s\" ! \"%s...\"", preBuffer, postBuffer);
973 }
2ca993e8 974 if(isStrict() || t.isNull()) {
57a6839d
A
975 *status = intStatus;
976 res_close(result);
977 return NULL;
978 }
979 }
980 icu::LocalMemory<uint8_t> buffer;
981 int32_t capacity = 100000;
982 uint8_t *dest = buffer.allocateInsteadAndCopy(capacity);
983 if(dest == NULL) {
984 fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
985 (long)capacity);
986 *status = U_MEMORY_ALLOCATION_ERROR;
987 res_close(result);
988 return NULL;
989 }
990 int32_t indexes[icu::CollationDataReader::IX_TOTAL_SIZE + 1];
991 int32_t totalSize = icu::CollationDataWriter::writeTailoring(
992 *t, *t->settings, indexes, dest, capacity, intStatus);
993 if(intStatus == U_BUFFER_OVERFLOW_ERROR) {
994 intStatus = U_ZERO_ERROR;
995 capacity = totalSize;
996 dest = buffer.allocateInsteadAndCopy(capacity);
997 if(dest == NULL) {
998 fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
999 (long)capacity);
1000 *status = U_MEMORY_ALLOCATION_ERROR;
1001 res_close(result);
1002 return NULL;
1003 }
1004 totalSize = icu::CollationDataWriter::writeTailoring(
1005 *t, *t->settings, indexes, dest, capacity, intStatus);
1006 }
1007 if(U_FAILURE(intStatus)) {
1008 fprintf(stderr, "CollationDataWriter::writeTailoring() failed: %s\n",
1009 u_errorName(intStatus));
1010 res_close(result);
1011 return NULL;
1012 }
1013 if(isVerbose()) {
1014 printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType);
1015 icu::CollationInfo::printSizes(totalSize, indexes);
b331163b
A
1016 if(t->settings->hasReordering()) {
1017 printf("%s~%s collation reordering ranges:\n", state->filename, collationType);
1018 icu::CollationInfo::printReorderRanges(
1019 *t->data, t->settings->reorderCodes, t->settings->reorderCodesLength);
1020 }
3d1f044b
A
1021#if 0 // debugging output
1022 } else {
1023 printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType);
1024 icu::CollationInfo::printSizes(totalSize, indexes);
1025#endif
57a6839d
A
1026 }
1027 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", totalSize, dest, NULL, NULL, status);
2ca993e8 1028 result->add(collationBin, line, *status);
57a6839d
A
1029 if (U_FAILURE(*status)) {
1030 res_close(result);
1031 return NULL;
1032 }
1033#endif
1034 return result;
1035}
1036
1037static UBool
b331163b
A
1038keepCollationType(const char * /*type*/) {
1039 return TRUE;
b75a7d8f
A
1040}
1041
374ca955 1042static struct SResource *
729e4ab9 1043parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
374ca955 1044{
2ca993e8 1045 TableResource *result = NULL;
374ca955 1046 struct SResource *member = NULL;
374ca955
A
1047 struct UString *tokenValue;
1048 struct UString comment;
1049 enum ETokenType token;
1050 char subtag[1024], typeKeyword[1024];
1051 uint32_t line;
1052
729e4ab9 1053 result = table_open(state->bundle, tag, NULL, status);
374ca955
A
1054
1055 if (result == NULL || U_FAILURE(*status))
1056 {
1057 return NULL;
1058 }
1059 if(isVerbose()){
1060 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1061 }
1062 if(!newCollation) {
57a6839d 1063 return addCollation(state, result, "(no type)", startline, status);
73c04bcf
A
1064 }
1065 else {
1066 for(;;) {
1067 ustr_init(&comment);
729e4ab9 1068 token = getToken(state, &tokenValue, &comment, &line, status);
374ca955 1069
73c04bcf 1070 if (token == TOK_CLOSE_BRACE)
374ca955 1071 {
73c04bcf 1072 return result;
374ca955 1073 }
73c04bcf
A
1074
1075 if (token != TOK_STRING)
374ca955 1076 {
46f4442e 1077 res_close(result);
73c04bcf
A
1078 *status = U_INVALID_FORMAT_ERROR;
1079
1080 if (token == TOK_EOF)
1081 {
1082 error(startline, "unterminated table");
1083 }
1084 else
1085 {
1086 error(line, "Unexpected token %s", tokenNames[token]);
1087 }
1088
1089 return NULL;
374ca955
A
1090 }
1091
73c04bcf 1092 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
374ca955 1093
73c04bcf
A
1094 if (U_FAILURE(*status))
1095 {
46f4442e 1096 res_close(result);
73c04bcf
A
1097 return NULL;
1098 }
374ca955 1099
73c04bcf
A
1100 if (uprv_strcmp(subtag, "default") == 0)
1101 {
729e4ab9 1102 member = parseResource(state, subtag, NULL, status);
374ca955 1103
73c04bcf
A
1104 if (U_FAILURE(*status))
1105 {
46f4442e 1106 res_close(result);
73c04bcf
A
1107 return NULL;
1108 }
374ca955 1109
2ca993e8 1110 result->add(member, line, *status);
73c04bcf
A
1111 }
1112 else
1113 {
729e4ab9 1114 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
73c04bcf
A
1115 /* this probably needs to be refactored or recursively use the parser */
1116 /* first we assume that our collation table won't have the explicit type */
1117 /* then, we cannot handle aliases */
1118 if(token == TOK_OPEN_BRACE) {
729e4ab9 1119 token = getToken(state, &tokenValue, &comment, &line, status);
2ca993e8 1120 TableResource *collationRes;
57a6839d
A
1121 if (keepCollationType(subtag)) {
1122 collationRes = table_open(state->bundle, subtag, NULL, status);
1123 } else {
1124 collationRes = NULL;
1125 }
1126 // need to parse the collation data regardless
1127 collationRes = addCollation(state, collationRes, subtag, startline, status);
1128 if (collationRes != NULL) {
2ca993e8 1129 result->add(collationRes, startline, *status);
729e4ab9 1130 }
73c04bcf
A
1131 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
1132 /* we could have a table too */
729e4ab9 1133 token = peekToken(state, 1, &tokenValue, &line, &comment, status);
73c04bcf
A
1134 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
1135 if(uprv_strcmp(typeKeyword, "alias") == 0) {
729e4ab9 1136 member = parseResource(state, subtag, NULL, status);
73c04bcf
A
1137 if (U_FAILURE(*status))
1138 {
46f4442e 1139 res_close(result);
73c04bcf
A
1140 return NULL;
1141 }
374ca955 1142
2ca993e8 1143 result->add(member, line, *status);
73c04bcf 1144 } else {
46f4442e 1145 res_close(result);
73c04bcf
A
1146 *status = U_INVALID_FORMAT_ERROR;
1147 return NULL;
1148 }
1149 } else {
46f4442e 1150 res_close(result);
73c04bcf
A
1151 *status = U_INVALID_FORMAT_ERROR;
1152 return NULL;
1153 }
374ca955 1154 }
374ca955 1155
73c04bcf 1156 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
374ca955 1157
73c04bcf 1158 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
374ca955 1159
73c04bcf
A
1160 if (U_FAILURE(*status))
1161 {
46f4442e 1162 res_close(result);
73c04bcf
A
1163 return NULL;
1164 }
374ca955 1165 }
374ca955
A
1166 }
1167}
1168
b75a7d8f
A
1169/* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1170 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1171static struct SResource *
2ca993e8 1172realParseTable(ParseState* state, TableResource *table, char *tag, uint32_t startline, UErrorCode *status)
b75a7d8f
A
1173{
1174 struct SResource *member = NULL;
1175 struct UString *tokenValue=NULL;
374ca955 1176 struct UString comment;
b75a7d8f
A
1177 enum ETokenType token;
1178 char subtag[1024];
1179 uint32_t line;
1180 UBool readToken = FALSE;
1181
1182 /* '{' . (name resource)* '}' */
4388f060 1183
b75a7d8f 1184 if(isVerbose()){
374ca955 1185 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1186 }
1187 for (;;)
1188 {
374ca955 1189 ustr_init(&comment);
729e4ab9 1190 token = getToken(state, &tokenValue, &comment, &line, status);
b75a7d8f
A
1191
1192 if (token == TOK_CLOSE_BRACE)
1193 {
1194 if (!readToken) {
1195 warning(startline, "Encountered empty table");
1196 }
1197 return table;
1198 }
1199
1200 if (token != TOK_STRING)
1201 {
b75a7d8f
A
1202 *status = U_INVALID_FORMAT_ERROR;
1203
1204 if (token == TOK_EOF)
1205 {
1206 error(startline, "unterminated table");
1207 }
1208 else
1209 {
374ca955 1210 error(line, "unexpected token %s", tokenNames[token]);
b75a7d8f
A
1211 }
1212
1213 return NULL;
1214 }
1215
374ca955
A
1216 if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
1217 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1218 } else {
1219 *status = U_INVALID_FORMAT_ERROR;
1220 error(line, "invariant characters required for table keys");
374ca955
A
1221 return NULL;
1222 }
b75a7d8f
A
1223
1224 if (U_FAILURE(*status))
1225 {
729e4ab9 1226 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
b75a7d8f
A
1227 return NULL;
1228 }
1229
729e4ab9 1230 member = parseResource(state, subtag, &comment, status);
b75a7d8f
A
1231
1232 if (member == NULL || U_FAILURE(*status))
1233 {
729e4ab9 1234 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
b75a7d8f
A
1235 return NULL;
1236 }
1237
2ca993e8 1238 table->add(member, line, *status);
b75a7d8f
A
1239
1240 if (U_FAILURE(*status))
1241 {
729e4ab9 1242 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
b75a7d8f
A
1243 return NULL;
1244 }
1245 readToken = TRUE;
46f4442e 1246 ustr_deinit(&comment);
4388f060 1247 }
b75a7d8f
A
1248
1249 /* not reached */
1250 /* A compiler warning will appear if all paths don't contain a return statement. */
1251/* *status = U_INTERNAL_PROGRAM_ERROR;
1252 return NULL;*/
1253}
1254
1255static struct SResource *
729e4ab9 1256parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f 1257{
b75a7d8f
A
1258 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
1259 {
729e4ab9 1260 return parseCollationElements(state, tag, startline, FALSE, status);
374ca955
A
1261 }
1262 if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1263 {
729e4ab9 1264 return parseCollationElements(state, tag, startline, TRUE, status);
b75a7d8f
A
1265 }
1266 if(isVerbose()){
374ca955 1267 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f 1268 }
374ca955 1269
2ca993e8 1270 TableResource *result = table_open(state->bundle, tag, comment, status);
b75a7d8f
A
1271
1272 if (result == NULL || U_FAILURE(*status))
1273 {
1274 return NULL;
1275 }
729e4ab9 1276 return realParseTable(state, result, tag, startline, status);
b75a7d8f
A
1277}
1278
1279static struct SResource *
729e4ab9 1280parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f 1281{
b75a7d8f
A
1282 struct SResource *member = NULL;
1283 struct UString *tokenValue;
374ca955 1284 struct UString memberComments;
b75a7d8f
A
1285 enum ETokenType token;
1286 UBool readToken = FALSE;
1287
2ca993e8 1288 ArrayResource *result = array_open(state->bundle, tag, comment, status);
b75a7d8f
A
1289
1290 if (result == NULL || U_FAILURE(*status))
1291 {
1292 return NULL;
1293 }
1294 if(isVerbose()){
374ca955 1295 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f 1296 }
374ca955
A
1297
1298 ustr_init(&memberComments);
1299
b75a7d8f
A
1300 /* '{' . resource [','] '}' */
1301 for (;;)
1302 {
374ca955
A
1303 /* reset length */
1304 ustr_setlen(&memberComments, 0, status);
1305
b75a7d8f 1306 /* check for end of array, but don't consume next token unless it really is the end */
729e4ab9 1307 token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
374ca955 1308
b75a7d8f
A
1309
1310 if (token == TOK_CLOSE_BRACE)
1311 {
729e4ab9 1312 getToken(state, NULL, NULL, NULL, status);
b75a7d8f
A
1313 if (!readToken) {
1314 warning(startline, "Encountered empty array");
1315 }
1316 break;
1317 }
1318
1319 if (token == TOK_EOF)
1320 {
46f4442e 1321 res_close(result);
b75a7d8f
A
1322 *status = U_INVALID_FORMAT_ERROR;
1323 error(startline, "unterminated array");
1324 return NULL;
1325 }
1326
1327 /* string arrays are a special case */
1328 if (token == TOK_STRING)
1329 {
729e4ab9
A
1330 getToken(state, &tokenValue, &memberComments, NULL, status);
1331 member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
b75a7d8f
A
1332 }
1333 else
1334 {
729e4ab9 1335 member = parseResource(state, NULL, &memberComments, status);
b75a7d8f
A
1336 }
1337
1338 if (member == NULL || U_FAILURE(*status))
1339 {
46f4442e 1340 res_close(result);
b75a7d8f
A
1341 return NULL;
1342 }
1343
2ca993e8 1344 result->add(member);
b75a7d8f
A
1345
1346 /* eat optional comma if present */
729e4ab9 1347 token = peekToken(state, 0, NULL, NULL, NULL, status);
b75a7d8f
A
1348
1349 if (token == TOK_COMMA)
1350 {
729e4ab9 1351 getToken(state, NULL, NULL, NULL, status);
b75a7d8f
A
1352 }
1353
1354 if (U_FAILURE(*status))
1355 {
46f4442e 1356 res_close(result);
b75a7d8f
A
1357 return NULL;
1358 }
1359 readToken = TRUE;
1360 }
1361
46f4442e 1362 ustr_deinit(&memberComments);
b75a7d8f
A
1363 return result;
1364}
1365
1366static struct SResource *
729e4ab9 1367parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f 1368{
b75a7d8f
A
1369 enum ETokenType token;
1370 char *string;
1371 int32_t value;
1372 UBool readToken = FALSE;
b75a7d8f 1373 char *stopstring;
374ca955 1374 struct UString memberComments;
b75a7d8f 1375
2ca993e8 1376 IntVectorResource *result = intvector_open(state->bundle, tag, comment, status);
b75a7d8f
A
1377
1378 if (result == NULL || U_FAILURE(*status))
1379 {
1380 return NULL;
1381 }
1382
1383 if(isVerbose()){
374ca955 1384 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f 1385 }
374ca955 1386 ustr_init(&memberComments);
b75a7d8f
A
1387 /* '{' . string [','] '}' */
1388 for (;;)
1389 {
374ca955
A
1390 ustr_setlen(&memberComments, 0, status);
1391
b75a7d8f 1392 /* check for end of array, but don't consume next token unless it really is the end */
729e4ab9 1393 token = peekToken(state, 0, NULL, NULL,&memberComments, status);
b75a7d8f
A
1394
1395 if (token == TOK_CLOSE_BRACE)
1396 {
1397 /* it's the end, consume the close brace */
729e4ab9 1398 getToken(state, NULL, NULL, NULL, status);
b75a7d8f
A
1399 if (!readToken) {
1400 warning(startline, "Encountered empty int vector");
1401 }
46f4442e 1402 ustr_deinit(&memberComments);
b75a7d8f
A
1403 return result;
1404 }
1405
340931cb
A
1406 int32_t stringLength;
1407 string = getInvariantString(state, NULL, NULL, stringLength, status);
b75a7d8f
A
1408
1409 if (U_FAILURE(*status))
1410 {
46f4442e 1411 res_close(result);
b75a7d8f
A
1412 return NULL;
1413 }
b75a7d8f 1414
46f4442e 1415 /* For handling illegal char in the Intvector */
b75a7d8f 1416 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
340931cb 1417 int32_t len = (int32_t)(stopstring-string);
b75a7d8f 1418
340931cb 1419 if(len==stringLength)
b75a7d8f 1420 {
2ca993e8 1421 result->add(value, *status);
b75a7d8f 1422 uprv_free(string);
729e4ab9 1423 token = peekToken(state, 0, NULL, NULL, NULL, status);
b75a7d8f
A
1424 }
1425 else
1426 {
1427 uprv_free(string);
1428 *status=U_INVALID_CHAR_FOUND;
1429 }
b75a7d8f
A
1430
1431 if (U_FAILURE(*status))
1432 {
46f4442e 1433 res_close(result);
b75a7d8f
A
1434 return NULL;
1435 }
1436
1437 /* the comma is optional (even though it is required to prevent the reader from concatenating
1438 consecutive entries) so that a missing comma on the last entry isn't an error */
1439 if (token == TOK_COMMA)
1440 {
729e4ab9 1441 getToken(state, NULL, NULL, NULL, status);
b75a7d8f
A
1442 }
1443 readToken = TRUE;
1444 }
1445
1446 /* not reached */
1447 /* A compiler warning will appear if all paths don't contain a return statement. */
1448/* intvector_close(result, status);
1449 *status = U_INTERNAL_PROGRAM_ERROR;
1450 return NULL;*/
1451}
1452
1453static struct SResource *
729e4ab9 1454parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f 1455{
2ca993e8 1456 uint32_t line;
340931cb
A
1457 int32_t stringLength;
1458 LocalMemory<char> string(getInvariantString(state, &line, NULL, stringLength, status));
2ca993e8 1459 if (string.isNull() || U_FAILURE(*status))
b75a7d8f
A
1460 {
1461 return NULL;
1462 }
1463
729e4ab9 1464 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
1465 if (U_FAILURE(*status))
1466 {
b75a7d8f
A
1467 return NULL;
1468 }
1469
1470 if(isVerbose()){
374ca955 1471 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1472 }
1473
340931cb
A
1474 LocalMemory<uint8_t> value;
1475 int32_t count = 0;
1476 if (stringLength > 0 && value.allocateInsteadAndCopy(stringLength) == NULL)
1477 {
1478 *status = U_MEMORY_ALLOCATION_ERROR;
1479 return NULL;
1480 }
b75a7d8f 1481
340931cb
A
1482 char toConv[3] = {'\0', '\0', '\0'};
1483 for (int32_t i = 0; i < stringLength;)
1484 {
1485 // Skip spaces (which may have been line endings).
1486 char c0 = string[i++];
1487 if (c0 == ' ') { continue; }
1488 if (i == stringLength) {
1489 *status=U_INVALID_CHAR_FOUND;
1490 error(line, "Encountered invalid binary value (odd number of hex digits)");
1491 return NULL;
1492 }
1493 toConv[0] = c0;
1494 toConv[1] = string[i++];
b75a7d8f 1495
340931cb
A
1496 char *stopstring;
1497 value[count++] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
1498 uint32_t len=(uint32_t)(stopstring-toConv);
b75a7d8f 1499
340931cb 1500 if(len!=2)
b75a7d8f 1501 {
340931cb
A
1502 *status=U_INVALID_CHAR_FOUND;
1503 error(line, "Encountered invalid binary value (not all pairs of hex digits)");
b75a7d8f
A
1504 return NULL;
1505 }
1506 }
340931cb
A
1507
1508 if (count == 0) {
2ca993e8
A
1509 warning(startline, "Encountered empty binary value");
1510 return bin_open(state->bundle, tag, 0, NULL, "", comment, status);
340931cb
A
1511 } else {
1512 return bin_open(state->bundle, tag, count, value.getAlias(), NULL, comment, status);
b75a7d8f 1513 }
b75a7d8f
A
1514}
1515
1516static struct SResource *
729e4ab9 1517parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
1518{
1519 struct SResource *result = NULL;
1520 int32_t value;
1521 char *string;
b75a7d8f 1522 char *stopstring;
b75a7d8f 1523
340931cb
A
1524 int32_t stringLength;
1525 string = getInvariantString(state, NULL, NULL, stringLength, status);
b75a7d8f
A
1526
1527 if (string == NULL || U_FAILURE(*status))
1528 {
1529 return NULL;
1530 }
1531
729e4ab9 1532 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
1533
1534 if (U_FAILURE(*status))
1535 {
1536 uprv_free(string);
1537 return NULL;
1538 }
1539
1540 if(isVerbose()){
374ca955 1541 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1542 }
1543
340931cb 1544 if (stringLength == 0)
b75a7d8f
A
1545 {
1546 warning(startline, "Encountered empty integer. Default value is 0.");
1547 }
1548
46f4442e
A
1549 /* Allow integer support for hexdecimal, octal digit and decimal*/
1550 /* and handle illegal char in the integer*/
b75a7d8f 1551 value = uprv_strtoul(string, &stopstring, 0);
340931cb
A
1552 int32_t len = (int32_t)(stopstring-string);
1553 if(len==stringLength)
b75a7d8f 1554 {
729e4ab9 1555 result = int_open(state->bundle, tag, value, comment, status);
b75a7d8f
A
1556 }
1557 else
1558 {
1559 *status=U_INVALID_CHAR_FOUND;
1560 }
1561 uprv_free(string);
1562
1563 return result;
1564}
1565
1566static struct SResource *
729e4ab9 1567parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
b75a7d8f 1568{
b75a7d8f 1569 uint32_t line;
340931cb
A
1570 int32_t stringLength;
1571 LocalMemory<char> filename(getInvariantString(state, &line, NULL, stringLength, status));
b75a7d8f
A
1572 if (U_FAILURE(*status))
1573 {
1574 return NULL;
1575 }
1576
729e4ab9 1577 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
1578
1579 if (U_FAILURE(*status))
1580 {
b75a7d8f
A
1581 return NULL;
1582 }
1583
1584 if(isVerbose()){
374ca955 1585 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1586 }
1587
1588 /* Open the input file for reading */
2ca993e8
A
1589 CharString fullname;
1590 if (state->inputdir != NULL) {
1591 fullname.append(state->inputdir, *status);
b75a7d8f 1592 }
2ca993e8
A
1593 fullname.appendPathPart(filename.getAlias(), *status);
1594 if (U_FAILURE(*status)) {
1595 return NULL;
b75a7d8f
A
1596 }
1597
2ca993e8 1598 FileStream *file = T_FileStream_open(fullname.data(), "rb");
b75a7d8f
A
1599 if (file == NULL)
1600 {
2ca993e8 1601 error(line, "couldn't open input file %s", filename.getAlias());
b75a7d8f
A
1602 *status = U_FILE_ACCESS_ERROR;
1603 return NULL;
1604 }
1605
2ca993e8
A
1606 int32_t len = T_FileStream_size(file);
1607 LocalMemory<uint8_t> data;
1608 if(data.allocateInsteadAndCopy(len) == NULL)
b75a7d8f
A
1609 {
1610 *status = U_MEMORY_ALLOCATION_ERROR;
1611 T_FileStream_close (file);
1612 return NULL;
1613 }
1614
2ca993e8 1615 /* int32_t numRead = */ T_FileStream_read(file, data.getAlias(), len);
b75a7d8f
A
1616 T_FileStream_close (file);
1617
2ca993e8 1618 return bin_open(state->bundle, tag, len, data.getAlias(), fullname.data(), comment, status);
b75a7d8f
A
1619}
1620
1621static struct SResource *
729e4ab9 1622parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
b75a7d8f
A
1623{
1624 struct SResource *result;
1625 int32_t len=0;
1626 char *filename;
1627 uint32_t line;
1628 UChar *pTarget = NULL;
1629
1630 UCHARBUF *ucbuf;
1631 char *fullname = NULL;
b75a7d8f
A
1632 const char* cp = NULL;
1633 const UChar* uBuffer = NULL;
1634
340931cb
A
1635 int32_t stringLength;
1636 filename = getInvariantString(state, &line, NULL, stringLength, status);
b75a7d8f
A
1637
1638 if (U_FAILURE(*status))
1639 {
1640 return NULL;
1641 }
1642
729e4ab9 1643 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
b75a7d8f
A
1644
1645 if (U_FAILURE(*status))
1646 {
1647 uprv_free(filename);
1648 return NULL;
1649 }
1650
1651 if(isVerbose()){
374ca955 1652 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1653 }
1654
340931cb 1655 fullname = (char *) uprv_malloc(state->inputdirLength + stringLength + 2);
b75a7d8f
A
1656 /* test for NULL */
1657 if(fullname == NULL)
1658 {
1659 *status = U_MEMORY_ALLOCATION_ERROR;
1660 uprv_free(filename);
1661 return NULL;
374ca955 1662 }
b75a7d8f 1663
729e4ab9
A
1664 if(state->inputdir!=NULL){
1665 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
b75a7d8f
A
1666 {
1667
729e4ab9 1668 uprv_strcpy(fullname, state->inputdir);
b75a7d8f 1669
729e4ab9
A
1670 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1671 fullname[state->inputdirLength + 1] = '\0';
b75a7d8f
A
1672
1673 uprv_strcat(fullname, filename);
1674 }
1675 else
1676 {
729e4ab9 1677 uprv_strcpy(fullname, state->inputdir);
b75a7d8f
A
1678 uprv_strcat(fullname, filename);
1679 }
1680 }else{
1681 uprv_strcpy(fullname,filename);
1682 }
1683
1684 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1685
1686 if (U_FAILURE(*status)) {
1687 error(line, "couldn't open input file %s\n", filename);
1688 return NULL;
1689 }
1690
1691 uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
729e4ab9 1692 result = string_open(state->bundle, tag, uBuffer, len, comment, status);
b75a7d8f 1693
51004dcb
A
1694 ucbuf_close(ucbuf);
1695
b75a7d8f
A
1696 uprv_free(pTarget);
1697
1698 uprv_free(filename);
1699 uprv_free(fullname);
1700
1701 return result;
1702}
1703
73c04bcf
A
1704
1705
1706
1707
1708U_STRING_DECL(k_type_string, "string", 6);
1709U_STRING_DECL(k_type_binary, "binary", 6);
1710U_STRING_DECL(k_type_bin, "bin", 3);
1711U_STRING_DECL(k_type_table, "table", 5);
1712U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17);
1713U_STRING_DECL(k_type_int, "int", 3);
1714U_STRING_DECL(k_type_integer, "integer", 7);
1715U_STRING_DECL(k_type_array, "array", 5);
1716U_STRING_DECL(k_type_alias, "alias", 5);
1717U_STRING_DECL(k_type_intvector, "intvector", 9);
1718U_STRING_DECL(k_type_import, "import", 6);
1719U_STRING_DECL(k_type_include, "include", 7);
73c04bcf
A
1720
1721/* Various non-standard processing plugins that create one or more special resources. */
1722U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1723U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18);
1724U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23);
1725U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19);
1726
1727typedef enum EResourceType
1728{
57a6839d
A
1729 RESTYPE_UNKNOWN,
1730 RESTYPE_STRING,
1731 RESTYPE_BINARY,
1732 RESTYPE_TABLE,
1733 RESTYPE_TABLE_NO_FALLBACK,
1734 RESTYPE_INTEGER,
1735 RESTYPE_ARRAY,
1736 RESTYPE_ALIAS,
1737 RESTYPE_INTVECTOR,
1738 RESTYPE_IMPORT,
1739 RESTYPE_INCLUDE,
1740 RESTYPE_PROCESS_UCA_RULES,
1741 RESTYPE_PROCESS_COLLATION,
1742 RESTYPE_PROCESS_TRANSLITERATOR,
1743 RESTYPE_PROCESS_DEPENDENCY,
1744 RESTYPE_RESERVED
73c04bcf
A
1745} EResourceType;
1746
1747static struct {
1748 const char *nameChars; /* only used for debugging */
1749 const UChar *nameUChars;
1750 ParseResourceFunction *parseFunction;
1751} gResourceTypes[] = {
1752 {"Unknown", NULL, NULL},
1753 {"string", k_type_string, parseString},
1754 {"binary", k_type_binary, parseBinary},
1755 {"table", k_type_table, parseTable},
1756 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1757 {"integer", k_type_integer, parseInteger},
1758 {"array", k_type_array, parseArray},
1759 {"alias", k_type_alias, parseAlias},
1760 {"intvector", k_type_intvector, parseIntVector},
1761 {"import", k_type_import, parseImport},
1762 {"include", k_type_include, parseInclude},
1763 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1764 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1765 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1766 {"process(dependency)", k_type_plugin_dependency, parseDependency},
1767 {"reserved", NULL, NULL}
1768};
1769
57a6839d 1770void initParser()
73c04bcf 1771{
73c04bcf
A
1772 U_STRING_INIT(k_type_string, "string", 6);
1773 U_STRING_INIT(k_type_binary, "binary", 6);
1774 U_STRING_INIT(k_type_bin, "bin", 3);
1775 U_STRING_INIT(k_type_table, "table", 5);
1776 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17);
1777 U_STRING_INIT(k_type_int, "int", 3);
1778 U_STRING_INIT(k_type_integer, "integer", 7);
1779 U_STRING_INIT(k_type_array, "array", 5);
1780 U_STRING_INIT(k_type_alias, "alias", 5);
1781 U_STRING_INIT(k_type_intvector, "intvector", 9);
1782 U_STRING_INIT(k_type_import, "import", 6);
73c04bcf
A
1783 U_STRING_INIT(k_type_include, "include", 7);
1784
1785 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1786 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18);
1787 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23);
1788 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19);
73c04bcf
A
1789}
1790
4388f060 1791static inline UBool isTable(enum EResourceType type) {
57a6839d 1792 return (UBool)(type==RESTYPE_TABLE || type==RESTYPE_TABLE_NO_FALLBACK);
73c04bcf
A
1793}
1794
1795static enum EResourceType
729e4ab9 1796parseResourceType(ParseState* state, UErrorCode *status)
73c04bcf
A
1797{
1798 struct UString *tokenValue;
1799 struct UString comment;
57a6839d 1800 enum EResourceType result = RESTYPE_UNKNOWN;
73c04bcf
A
1801 uint32_t line=0;
1802 ustr_init(&comment);
729e4ab9 1803 expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
73c04bcf
A
1804
1805 if (U_FAILURE(*status))
1806 {
57a6839d 1807 return RESTYPE_UNKNOWN;
73c04bcf
A
1808 }
1809
1810 *status = U_ZERO_ERROR;
1811
1812 /* Search for normal types */
57a6839d
A
1813 result=RESTYPE_UNKNOWN;
1814 while ((result=(EResourceType)(result+1)) < RESTYPE_RESERVED) {
73c04bcf
A
1815 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1816 break;
1817 }
1818 }
1819 /* Now search for the aliases */
1820 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
57a6839d 1821 result = RESTYPE_INTEGER;
73c04bcf
A
1822 }
1823 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
57a6839d 1824 result = RESTYPE_BINARY;
73c04bcf 1825 }
57a6839d 1826 else if (result == RESTYPE_RESERVED) {
73c04bcf
A
1827 char tokenBuffer[1024];
1828 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1829 tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1830 *status = U_INVALID_FORMAT_ERROR;
1831 error(line, "unknown resource type '%s'", tokenBuffer);
1832 }
1833
1834 return result;
1835}
1836
1837/* parse a non-top-level resource */
b75a7d8f 1838static struct SResource *
729e4ab9 1839parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
b75a7d8f
A
1840{
1841 enum ETokenType token;
57a6839d 1842 enum EResourceType resType = RESTYPE_UNKNOWN;
73c04bcf 1843 ParseResourceFunction *parseFunction = NULL;
b75a7d8f
A
1844 struct UString *tokenValue;
1845 uint32_t startline;
1846 uint32_t line;
1847
4388f060 1848
729e4ab9 1849 token = getToken(state, &tokenValue, NULL, &startline, status);
b75a7d8f
A
1850
1851 if(isVerbose()){
374ca955 1852 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
b75a7d8f
A
1853 }
1854
1855 /* name . [ ':' type ] '{' resource '}' */
1856 /* This function parses from the colon onwards. If the colon is present, parse the
1857 type then try to parse a resource of that type. If there is no explicit type,
1858 work it out using the lookahead tokens. */
1859 switch (token)
1860 {
1861 case TOK_EOF:
1862 *status = U_INVALID_FORMAT_ERROR;
1863 error(startline, "Unexpected EOF encountered");
1864 return NULL;
1865
1866 case TOK_ERROR:
1867 *status = U_INVALID_FORMAT_ERROR;
1868 return NULL;
1869
1870 case TOK_COLON:
729e4ab9
A
1871 resType = parseResourceType(state, status);
1872 expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
b75a7d8f
A
1873
1874 if (U_FAILURE(*status))
1875 {
1876 return NULL;
1877 }
1878
1879 break;
1880
1881 case TOK_OPEN_BRACE:
1882 break;
1883
1884 default:
1885 *status = U_INVALID_FORMAT_ERROR;
1886 error(startline, "syntax error while reading a resource, expected '{' or ':'");
1887 return NULL;
1888 }
1889
4388f060 1890
57a6839d 1891 if (resType == RESTYPE_UNKNOWN)
b75a7d8f
A
1892 {
1893 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
1894 We could have any of the following:
1895 { { => array (nested)
1896 { :/} => array
1897 { string , => string array
1898
b75a7d8f
A
1899 { string { => table
1900
b75a7d8f
A
1901 { string :/{ => table
1902 { string } => string
1903 */
1904
729e4ab9 1905 token = peekToken(state, 0, NULL, &line, NULL,status);
b75a7d8f
A
1906
1907 if (U_FAILURE(*status))
1908 {
1909 return NULL;
1910 }
1911
b75a7d8f
A
1912 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
1913 {
57a6839d 1914 resType = RESTYPE_ARRAY;
b75a7d8f
A
1915 }
1916 else if (token == TOK_STRING)
1917 {
729e4ab9 1918 token = peekToken(state, 1, NULL, &line, NULL, status);
b75a7d8f
A
1919
1920 if (U_FAILURE(*status))
1921 {
1922 return NULL;
1923 }
1924
1925 switch (token)
1926 {
57a6839d
A
1927 case TOK_COMMA: resType = RESTYPE_ARRAY; break;
1928 case TOK_OPEN_BRACE: resType = RESTYPE_TABLE; break;
1929 case TOK_CLOSE_BRACE: resType = RESTYPE_STRING; break;
1930 case TOK_COLON: resType = RESTYPE_TABLE; break;
b75a7d8f
A
1931 default:
1932 *status = U_INVALID_FORMAT_ERROR;
1933 error(line, "Unexpected token after string, expected ',', '{' or '}'");
1934 return NULL;
1935 }
1936 }
1937 else
1938 {
1939 *status = U_INVALID_FORMAT_ERROR;
1940 error(line, "Unexpected token after '{'");
1941 return NULL;
1942 }
1943
1944 /* printf("Type guessed as %s\n", resourceNames[resType]); */
57a6839d 1945 } else if(resType == RESTYPE_TABLE_NO_FALLBACK) {
73c04bcf
A
1946 *status = U_INVALID_FORMAT_ERROR;
1947 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
1948 return NULL;
b75a7d8f
A
1949 }
1950
4388f060 1951
b75a7d8f
A
1952 /* We should now know what we need to parse next, so call the appropriate parser
1953 function and return. */
73c04bcf
A
1954 parseFunction = gResourceTypes[resType].parseFunction;
1955 if (parseFunction != NULL) {
729e4ab9 1956 return parseFunction(state, tag, startline, comment, status);
73c04bcf
A
1957 }
1958 else {
b75a7d8f 1959 *status = U_INTERNAL_PROGRAM_ERROR;
73c04bcf 1960 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
b75a7d8f
A
1961 }
1962
1963 return NULL;
1964}
1965
73c04bcf 1966/* parse the top-level resource */
b75a7d8f 1967struct SRBRoot *
57a6839d
A
1968parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, const char *filename,
1969 UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status)
b75a7d8f
A
1970{
1971 struct UString *tokenValue;
374ca955 1972 struct UString comment;
b75a7d8f 1973 uint32_t line;
b75a7d8f
A
1974 enum EResourceType bundleType;
1975 enum ETokenType token;
729e4ab9
A
1976 ParseState state;
1977 uint32_t i;
4388f060
A
1978
1979
729e4ab9
A
1980 for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
1981 {
1982 ustr_init(&state.lookahead[i].value);
1983 ustr_init(&state.lookahead[i].comment);
1984 }
b75a7d8f 1985
729e4ab9 1986 initLookahead(&state, buf, status);
b75a7d8f 1987
729e4ab9
A
1988 state.inputdir = inputDir;
1989 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
1990 state.outputdir = outputDir;
1991 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
57a6839d 1992 state.filename = filename;
4388f060 1993 state.makeBinaryCollation = makeBinaryCollation;
57a6839d 1994 state.omitCollationRules = omitCollationRules;
374ca955
A
1995
1996 ustr_init(&comment);
729e4ab9 1997 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
b75a7d8f 1998
2ca993e8 1999 state.bundle = new SRBRoot(&comment, FALSE, *status);
b75a7d8f 2000
729e4ab9 2001 if (state.bundle == NULL || U_FAILURE(*status))
b75a7d8f 2002 {
3d1f044b
A
2003 delete state.bundle;
2004
b75a7d8f
A
2005 return NULL;
2006 }
2007
374ca955 2008
2ca993e8 2009 state.bundle->setLocale(tokenValue->fChars, *status);
729e4ab9 2010
b75a7d8f 2011 /* The following code is to make Empty bundle work no matter with :table specifer or not */
729e4ab9 2012 token = getToken(&state, NULL, NULL, &line, status);
73c04bcf 2013 if(token==TOK_COLON) {
b75a7d8f 2014 *status=U_ZERO_ERROR;
729e4ab9 2015 bundleType=parseResourceType(&state, status);
b75a7d8f 2016
73c04bcf 2017 if(isTable(bundleType))
b75a7d8f 2018 {
729e4ab9 2019 expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
b75a7d8f
A
2020 }
2021 else
2022 {
2023 *status=U_PARSE_ERROR;
4388f060 2024 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
b75a7d8f
A
2025 }
2026 }
2027 else
2028 {
73c04bcf 2029 /* not a colon */
b75a7d8f
A
2030 if(token==TOK_OPEN_BRACE)
2031 {
2032 *status=U_ZERO_ERROR;
57a6839d 2033 bundleType=RESTYPE_TABLE;
b75a7d8f
A
2034 }
2035 else
2036 {
73c04bcf
A
2037 /* neither colon nor open brace */
2038 *status=U_PARSE_ERROR;
57a6839d 2039 bundleType=RESTYPE_UNKNOWN;
b75a7d8f
A
2040 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
2041 }
2042 }
b75a7d8f
A
2043
2044 if (U_FAILURE(*status))
2045 {
2ca993e8 2046 delete state.bundle;
b75a7d8f
A
2047 return NULL;
2048 }
2049
57a6839d 2050 if(bundleType==RESTYPE_TABLE_NO_FALLBACK) {
73c04bcf
A
2051 /*
2052 * Parse a top-level table with the table(nofallback) declaration.
2053 * This is the same as a regular table, but also sets the
2054 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2055 */
2ca993e8 2056 state.bundle->fNoFallback=TRUE;
73c04bcf
A
2057 }
2058 /* top-level tables need not handle special table names like "collations" */
2ca993e8
A
2059 assert(!state.bundle->fIsPoolBundle);
2060 assert(state.bundle->fRoot->fType == URES_TABLE);
2061 TableResource *rootTable = static_cast<TableResource *>(state.bundle->fRoot);
2062 realParseTable(&state, rootTable, NULL, line, status);
73c04bcf 2063 if(dependencyArray!=NULL){
2ca993e8 2064 rootTable->add(dependencyArray, 0, *status);
73c04bcf
A
2065 dependencyArray = NULL;
2066 }
4388f060 2067 if (U_FAILURE(*status))
b75a7d8f 2068 {
2ca993e8 2069 delete state.bundle;
46f4442e 2070 res_close(dependencyArray);
b75a7d8f
A
2071 return NULL;
2072 }
2073
729e4ab9 2074 if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
b75a7d8f
A
2075 {
2076 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
2077 if(isStrict()){
2078 *status = U_INVALID_FORMAT_ERROR;
2079 return NULL;
2080 }
2081 }
2082
729e4ab9 2083 cleanupLookahead(&state);
46f4442e 2084 ustr_deinit(&comment);
729e4ab9 2085 return state.bundle;
b75a7d8f 2086}