]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
b75a7d8f A |
3 | /* |
4 | ******************************************************************************* | |
5 | * | |
2ca993e8 | 6 | * Copyright (C) 1998-2016, International Business Machines |
b75a7d8f A |
7 | * Corporation and others. All Rights Reserved. |
8 | * | |
9 | ******************************************************************************* | |
10 | * | |
2ca993e8 | 11 | * File genrb.cpp |
b75a7d8f A |
12 | * |
13 | * Modification History: | |
14 | * | |
15 | * Date Name Description | |
16 | * 05/25/99 stephen Creation. | |
17 | * 5/10/01 Ram removed ustdio dependency | |
18 | ******************************************************************************* | |
19 | */ | |
20 | ||
3d1f044b A |
21 | #include <fstream> |
22 | #include <iostream> | |
23 | #include <list> | |
24 | #include <string> | |
25 | ||
2ca993e8 | 26 | #include <assert.h> |
b75a7d8f | 27 | #include "genrb.h" |
2ca993e8 | 28 | #include "unicode/localpointer.h" |
374ca955 | 29 | #include "unicode/uclean.h" |
2ca993e8 A |
30 | #include "unicode/utf16.h" |
31 | #include "charstr.h" | |
32 | #include "cmemory.h" | |
3d1f044b | 33 | #include "filterrb.h" |
2ca993e8 | 34 | #include "reslist.h" |
729e4ab9 A |
35 | #include "ucmndata.h" /* TODO: for reading the pool bundle */ |
36 | ||
2ca993e8 A |
37 | U_NAMESPACE_USE |
38 | ||
b75a7d8f | 39 | /* Protos */ |
3d1f044b A |
40 | void processFile(const char *filename, const char* cp, |
41 | const char *inputDir, const char *outputDir, const char *filterDir, | |
2ca993e8 A |
42 | const char *packageName, |
43 | SRBRoot *newPoolBundle, UBool omitBinaryCollation, UErrorCode &status); | |
374ca955 | 44 | static char *make_res_filename(const char *filename, const char *outputDir, |
2ca993e8 | 45 | const char *packageName, UErrorCode &status); |
b75a7d8f A |
46 | |
47 | /* File suffixes */ | |
48 | #define RES_SUFFIX ".res" | |
49 | #define COL_SUFFIX ".col" | |
50 | ||
2ca993e8 | 51 | const char *gCurrentFileName = NULL; |
b75a7d8f A |
52 | #ifdef XP_MAC_CONSOLE |
53 | #include <console.h> | |
54 | #endif | |
55 | ||
2ca993e8 A |
56 | void ResFile::close() { |
57 | delete[] fBytes; | |
58 | fBytes = NULL; | |
59 | delete fStrings; | |
60 | fStrings = NULL; | |
61 | } | |
62 | ||
b75a7d8f A |
63 | enum |
64 | { | |
65 | HELP1, | |
66 | HELP2, | |
67 | VERBOSE, | |
68 | QUIET, | |
69 | VERSION, | |
70 | SOURCEDIR, | |
71 | DESTDIR, | |
72 | ENCODING, | |
73 | ICUDATADIR, | |
74 | WRITE_JAVA, | |
75 | COPYRIGHT, | |
51004dcb | 76 | JAVA_PACKAGE, |
b75a7d8f | 77 | BUNDLE_NAME, |
374ca955 | 78 | WRITE_XLIFF, |
b75a7d8f A |
79 | STRICT, |
80 | NO_BINARY_COLLATION, | |
46f4442e | 81 | LANGUAGE, |
729e4ab9 A |
82 | NO_COLLATION_RULES, |
83 | FORMAT_VERSION, | |
84 | WRITE_POOL_BUNDLE, | |
85 | USE_POOL_BUNDLE, | |
3d1f044b A |
86 | INCLUDE_UNIHAN_COLL, |
87 | FILTERDIR | |
b75a7d8f A |
88 | }; |
89 | ||
90 | UOption options[]={ | |
91 | UOPTION_HELP_H, | |
92 | UOPTION_HELP_QUESTION_MARK, | |
93 | UOPTION_VERBOSE, | |
94 | UOPTION_QUIET, | |
95 | UOPTION_VERSION, | |
96 | UOPTION_SOURCEDIR, | |
97 | UOPTION_DESTDIR, | |
98 | UOPTION_ENCODING, | |
99 | UOPTION_ICUDATADIR, | |
100 | UOPTION_WRITE_JAVA, | |
101 | UOPTION_COPYRIGHT, | |
51004dcb | 102 | UOPTION_DEF("java-package", '\x01', UOPT_REQUIRES_ARG), |
b75a7d8f | 103 | UOPTION_BUNDLE_NAME, |
729e4ab9 A |
104 | UOPTION_DEF("write-xliff", 'x', UOPT_OPTIONAL_ARG), |
105 | UOPTION_DEF("strict", 'k', UOPT_NO_ARG), /* 14 */ | |
106 | UOPTION_DEF("noBinaryCollation", 'C', UOPT_NO_ARG),/* 15 */ | |
107 | UOPTION_DEF("language", 'l', UOPT_REQUIRES_ARG), /* 16 */ | |
108 | UOPTION_DEF("omitCollationRules", 'R', UOPT_NO_ARG),/* 17 */ | |
109 | UOPTION_DEF("formatVersion", '\x01', UOPT_REQUIRES_ARG),/* 18 */ | |
3d1f044b | 110 | UOPTION_DEF("writePoolBundle", '\x01', UOPT_OPTIONAL_ARG),/* 19 */ |
729e4ab9 A |
111 | UOPTION_DEF("usePoolBundle", '\x01', UOPT_OPTIONAL_ARG),/* 20 */ |
112 | UOPTION_DEF("includeUnihanColl", '\x01', UOPT_NO_ARG),/* 21 */ /* temporary, don't display in usage info */ | |
3d1f044b | 113 | UOPTION_DEF("filterDir", '\x01', UOPT_OPTIONAL_ARG), /* 22 */ |
b75a7d8f A |
114 | }; |
115 | ||
116 | static UBool write_java = FALSE; | |
374ca955 | 117 | static UBool write_xliff = FALSE; |
b75a7d8f | 118 | static const char* outputEnc =""; |
729e4ab9 | 119 | |
2ca993e8 | 120 | static ResFile poolBundle; |
729e4ab9 | 121 | |
b75a7d8f A |
122 | /*added by Jing*/ |
123 | static const char* language = NULL; | |
374ca955 | 124 | static const char* xliffOutputFileName = NULL; |
b75a7d8f A |
125 | int |
126 | main(int argc, | |
127 | char* argv[]) | |
128 | { | |
129 | UErrorCode status = U_ZERO_ERROR; | |
130 | const char *arg = NULL; | |
131 | const char *outputDir = NULL; /* NULL = no output directory, use current */ | |
132 | const char *inputDir = NULL; | |
3d1f044b | 133 | const char *filterDir = NULL; |
b75a7d8f A |
134 | const char *encoding = ""; |
135 | int i; | |
51004dcb | 136 | UBool illegalArg = FALSE; |
374ca955 | 137 | |
b75a7d8f A |
138 | U_MAIN_INIT_ARGS(argc, argv); |
139 | ||
51004dcb A |
140 | options[JAVA_PACKAGE].value = "com.ibm.icu.impl.data"; |
141 | options[BUNDLE_NAME].value = "LocaleElements"; | |
2ca993e8 | 142 | argc = u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options); |
b75a7d8f A |
143 | |
144 | /* error handling, printing usage message */ | |
145 | if(argc<0) { | |
146 | fprintf(stderr, "%s: error in command line argument \"%s\"\n", argv[0], argv[-argc]); | |
2ca993e8 | 147 | illegalArg = TRUE; |
b75a7d8f | 148 | } else if(argc<2) { |
2ca993e8 | 149 | illegalArg = TRUE; |
b75a7d8f | 150 | } |
729e4ab9 A |
151 | if(options[WRITE_POOL_BUNDLE].doesOccur && options[USE_POOL_BUNDLE].doesOccur) { |
152 | fprintf(stderr, "%s: cannot combine --writePoolBundle and --usePoolBundle\n", argv[0]); | |
2ca993e8 | 153 | illegalArg = TRUE; |
729e4ab9 A |
154 | } |
155 | if(options[FORMAT_VERSION].doesOccur) { | |
156 | const char *s = options[FORMAT_VERSION].value; | |
2ca993e8 | 157 | if(uprv_strlen(s) != 1 || (s[0] < '1' && '3' < s[0])) { |
729e4ab9 | 158 | fprintf(stderr, "%s: unsupported --formatVersion %s\n", argv[0], s); |
2ca993e8 | 159 | illegalArg = TRUE; |
729e4ab9 A |
160 | } else if(s[0] == '1' && |
161 | (options[WRITE_POOL_BUNDLE].doesOccur || options[USE_POOL_BUNDLE].doesOccur) | |
162 | ) { | |
163 | fprintf(stderr, "%s: cannot combine --formatVersion 1 with --writePoolBundle or --usePoolBundle\n", argv[0]); | |
2ca993e8 | 164 | illegalArg = TRUE; |
729e4ab9 A |
165 | } else { |
166 | setFormatVersion(s[0] - '0'); | |
167 | } | |
168 | } | |
b75a7d8f | 169 | |
2ca993e8 A |
170 | if((options[JAVA_PACKAGE].doesOccur || options[BUNDLE_NAME].doesOccur) && |
171 | !options[WRITE_JAVA].doesOccur) { | |
51004dcb A |
172 | fprintf(stderr, |
173 | "%s error: command line argument --java-package or --bundle-name " | |
174 | "without --write-java\n", | |
175 | argv[0]); | |
176 | illegalArg = TRUE; | |
177 | } | |
178 | ||
2ca993e8 A |
179 | if(options[VERSION].doesOccur) { |
180 | fprintf(stderr, | |
181 | "%s version %s (ICU version %s).\n" | |
182 | "%s\n", | |
183 | argv[0], GENRB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING); | |
184 | if(!illegalArg) { | |
185 | return U_ZERO_ERROR; | |
186 | } | |
187 | } | |
188 | ||
51004dcb | 189 | if(illegalArg || options[HELP1].doesOccur || options[HELP2].doesOccur) { |
b75a7d8f | 190 | /* |
729e4ab9 | 191 | * Broken into chunks because the C89 standard says the minimum |
b75a7d8f A |
192 | * required supported string length is 509 bytes. |
193 | */ | |
194 | fprintf(stderr, | |
195 | "Usage: %s [OPTIONS] [FILES]\n" | |
196 | "\tReads the list of resource bundle source files and creates\n" | |
4388f060 | 197 | "\tbinary version of resource bundles (.res files)\n", |
b75a7d8f A |
198 | argv[0]); |
199 | fprintf(stderr, | |
200 | "Options:\n" | |
201 | "\t-h or -? or --help this usage text\n" | |
202 | "\t-q or --quiet do not display warnings\n" | |
203 | "\t-v or --verbose print extra information when processing files\n" | |
204 | "\t-V or --version prints out version number and exits\n" | |
205 | "\t-c or --copyright include copyright notice\n"); | |
206 | fprintf(stderr, | |
207 | "\t-e or --encoding encoding of source files\n" | |
208 | "\t-d of --destdir destination directory, followed by the path, defaults to %s\n" | |
209 | "\t-s or --sourcedir source directory for files followed by path, defaults to %s\n" | |
210 | "\t-i or --icudatadir directory for locating any needed intermediate data files,\n" | |
211 | "\t followed by path, defaults to %s\n", | |
212 | u_getDataDirectory(), u_getDataDirectory(), u_getDataDirectory()); | |
213 | fprintf(stderr, | |
214 | "\t-j or --write-java write a Java ListResourceBundle for ICU4J, followed by optional encoding\n" | |
51004dcb A |
215 | "\t defaults to ASCII and \\uXXXX format.\n" |
216 | "\t --java-package For --write-java: package name for writing the ListResourceBundle,\n" | |
217 | "\t defaults to com.ibm.icu.impl.data\n"); | |
b75a7d8f | 218 | fprintf(stderr, |
51004dcb | 219 | "\t-b or --bundle-name For --write-java: root resource bundle name for writing the ListResourceBundle,\n" |
b75a7d8f | 220 | "\t defaults to LocaleElements\n" |
46f4442e A |
221 | "\t-x or --write-xliff write an XLIFF file for the resource bundle. Followed by\n" |
222 | "\t an optional output file name.\n" | |
b75a7d8f A |
223 | "\t-k or --strict use pedantic parsing of syntax\n" |
224 | /*added by Jing*/ | |
46f4442e A |
225 | "\t-l or --language for XLIFF: language code compliant with BCP 47.\n"); |
226 | fprintf(stderr, | |
227 | "\t-C or --noBinaryCollation do not generate binary collation image;\n" | |
228 | "\t makes .res file smaller but collator instantiation much slower;\n" | |
229 | "\t maintains ability to get tailoring rules\n" | |
230 | "\t-R or --omitCollationRules do not include collation (tailoring) rules;\n" | |
231 | "\t makes .res file smaller and maintains collator instantiation speed\n" | |
232 | "\t but tailoring rules will not be available (they are rarely used)\n"); | |
729e4ab9 A |
233 | fprintf(stderr, |
234 | "\t --formatVersion write a .res file compatible with the requested formatVersion (single digit);\n" | |
235 | "\t for example, --formatVersion 1\n"); | |
236 | fprintf(stderr, | |
3d1f044b A |
237 | "\t --writePoolBundle [directory] write a pool.res file with all of the keys of all input bundles\n" |
238 | "\t --usePoolBundle [directory] point to keys from the pool.res keys pool bundle if they are available there;\n" | |
729e4ab9 A |
239 | "\t makes .res files smaller but dependent on the pool bundle\n" |
240 | "\t (--writePoolBundle and --usePoolBundle cannot be combined)\n"); | |
3d1f044b A |
241 | fprintf(stderr, |
242 | "\t --filterDir Input directory where filter files are available.\n" | |
243 | "\t For more on filter files, see Python buildtool.\n"); | |
b75a7d8f | 244 | |
51004dcb | 245 | return illegalArg ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; |
b75a7d8f A |
246 | } |
247 | ||
248 | if(options[VERBOSE].doesOccur) { | |
249 | setVerbose(TRUE); | |
250 | } | |
251 | ||
252 | if(options[QUIET].doesOccur) { | |
253 | setShowWarning(FALSE); | |
254 | } | |
255 | if(options[STRICT].doesOccur) { | |
256 | setStrict(TRUE); | |
257 | } | |
258 | if(options[COPYRIGHT].doesOccur){ | |
259 | setIncludeCopyright(TRUE); | |
260 | } | |
261 | ||
262 | if(options[SOURCEDIR].doesOccur) { | |
263 | inputDir = options[SOURCEDIR].value; | |
264 | } | |
265 | ||
266 | if(options[DESTDIR].doesOccur) { | |
267 | outputDir = options[DESTDIR].value; | |
268 | } | |
b75a7d8f | 269 | |
3d1f044b A |
270 | if (options[FILTERDIR].doesOccur) { |
271 | filterDir = options[FILTERDIR].value; | |
272 | } | |
273 | ||
b75a7d8f A |
274 | if(options[ENCODING].doesOccur) { |
275 | encoding = options[ENCODING].value; | |
276 | } | |
277 | ||
278 | if(options[ICUDATADIR].doesOccur) { | |
279 | u_setDataDirectory(options[ICUDATADIR].value); | |
280 | } | |
374ca955 A |
281 | /* Initialize ICU */ |
282 | u_init(&status); | |
283 | if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) { | |
284 | /* Note: u_init() will try to open ICU property data. | |
285 | * failures here are expected when building ICU from scratch. | |
286 | * ignore them. | |
287 | */ | |
288 | fprintf(stderr, "%s: can not initialize ICU. status = %s\n", | |
289 | argv[0], u_errorName(status)); | |
290 | exit(1); | |
291 | } | |
292 | status = U_ZERO_ERROR; | |
b75a7d8f A |
293 | if(options[WRITE_JAVA].doesOccur) { |
294 | write_java = TRUE; | |
295 | outputEnc = options[WRITE_JAVA].value; | |
296 | } | |
297 | ||
374ca955 A |
298 | if(options[WRITE_XLIFF].doesOccur) { |
299 | write_xliff = TRUE; | |
300 | if(options[WRITE_XLIFF].value != NULL){ | |
301 | xliffOutputFileName = options[WRITE_XLIFF].value; | |
302 | } | |
b75a7d8f A |
303 | } |
304 | ||
57a6839d | 305 | initParser(); |
374ca955 | 306 | |
b75a7d8f A |
307 | /*added by Jing*/ |
308 | if(options[LANGUAGE].doesOccur) { | |
309 | language = options[LANGUAGE].value; | |
310 | } | |
311 | ||
2ca993e8 | 312 | LocalPointer<SRBRoot> newPoolBundle; |
729e4ab9 | 313 | if(options[WRITE_POOL_BUNDLE].doesOccur) { |
2ca993e8 | 314 | newPoolBundle.adoptInsteadAndCheckErrorCode(new SRBRoot(NULL, TRUE, status), status); |
729e4ab9 A |
315 | if(U_FAILURE(status)) { |
316 | fprintf(stderr, "unable to create an empty bundle for the pool keys: %s\n", u_errorName(status)); | |
317 | return status; | |
318 | } else { | |
319 | const char *poolResName = "pool.res"; | |
2ca993e8 | 320 | char *nameWithoutSuffix = static_cast<char *>(uprv_malloc(uprv_strlen(poolResName) + 1)); |
729e4ab9 A |
321 | if (nameWithoutSuffix == NULL) { |
322 | fprintf(stderr, "out of memory error\n"); | |
323 | return U_MEMORY_ALLOCATION_ERROR; | |
324 | } | |
325 | uprv_strcpy(nameWithoutSuffix, poolResName); | |
326 | *uprv_strrchr(nameWithoutSuffix, '.') = 0; | |
327 | newPoolBundle->fLocale = nameWithoutSuffix; | |
328 | } | |
329 | } | |
330 | ||
331 | if(options[USE_POOL_BUNDLE].doesOccur) { | |
332 | const char *poolResName = "pool.res"; | |
333 | FileStream *poolFile; | |
334 | int32_t poolFileSize; | |
335 | int32_t indexLength; | |
336 | /* | |
337 | * TODO: Consolidate inputDir/filename handling from main() and processFile() | |
338 | * into a common function, and use it here as well. | |
339 | * Try to create toolutil functions for dealing with dir/filenames and | |
340 | * loading ICU data files without udata_open(). | |
341 | * Share code with icupkg? | |
342 | * Also, make_res_filename() seems to be unused. Review and remove. | |
343 | */ | |
2ca993e8 | 344 | CharString poolFileName; |
729e4ab9 | 345 | if (options[USE_POOL_BUNDLE].value!=NULL) { |
2ca993e8 | 346 | poolFileName.append(options[USE_POOL_BUNDLE].value, status); |
729e4ab9 | 347 | } else if (inputDir) { |
2ca993e8 A |
348 | poolFileName.append(inputDir, status); |
349 | } | |
350 | poolFileName.appendPathPart(poolResName, status); | |
351 | if (U_FAILURE(status)) { | |
352 | return status; | |
729e4ab9 | 353 | } |
2ca993e8 | 354 | poolFile = T_FileStream_open(poolFileName.data(), "rb"); |
729e4ab9 | 355 | if (poolFile == NULL) { |
2ca993e8 | 356 | fprintf(stderr, "unable to open pool bundle file %s\n", poolFileName.data()); |
729e4ab9 A |
357 | return 1; |
358 | } | |
359 | poolFileSize = T_FileStream_size(poolFile); | |
360 | if (poolFileSize < 32) { | |
2ca993e8 | 361 | fprintf(stderr, "the pool bundle file %s is too small\n", poolFileName.data()); |
729e4ab9 A |
362 | return 1; |
363 | } | |
2ca993e8 | 364 | poolBundle.fBytes = new uint8_t[(poolFileSize + 15) & ~15]; |
729e4ab9 | 365 | if (poolFileSize > 0 && poolBundle.fBytes == NULL) { |
2ca993e8 | 366 | fprintf(stderr, "unable to allocate memory for the pool bundle file %s\n", poolFileName.data()); |
729e4ab9 | 367 | return U_MEMORY_ALLOCATION_ERROR; |
729e4ab9 | 368 | } |
2ca993e8 A |
369 | |
370 | UDataSwapper *ds; | |
371 | const DataHeader *header; | |
372 | int32_t bytesRead = T_FileStream_read(poolFile, poolBundle.fBytes, poolFileSize); | |
373 | if (bytesRead != poolFileSize) { | |
374 | fprintf(stderr, "unable to read the pool bundle file %s\n", poolFileName.data()); | |
375 | return 1; | |
376 | } | |
377 | /* | |
378 | * Swap the pool bundle so that a single checked-in file can be used. | |
379 | * The swapper functions also test that the data looks like | |
380 | * a well-formed .res file. | |
381 | */ | |
382 | ds = udata_openSwapperForInputData(poolBundle.fBytes, bytesRead, | |
383 | U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &status); | |
384 | if (U_FAILURE(status)) { | |
385 | fprintf(stderr, "udata_openSwapperForInputData(pool bundle %s) failed: %s\n", | |
386 | poolFileName.data(), u_errorName(status)); | |
387 | return status; | |
388 | } | |
389 | ures_swap(ds, poolBundle.fBytes, bytesRead, poolBundle.fBytes, &status); | |
390 | udata_closeSwapper(ds); | |
391 | if (U_FAILURE(status)) { | |
392 | fprintf(stderr, "ures_swap(pool bundle %s) failed: %s\n", | |
393 | poolFileName.data(), u_errorName(status)); | |
394 | return status; | |
395 | } | |
396 | header = (const DataHeader *)poolBundle.fBytes; | |
397 | if (header->info.formatVersion[0] < 2) { | |
398 | fprintf(stderr, "invalid format of pool bundle file %s\n", poolFileName.data()); | |
399 | return U_INVALID_FORMAT_ERROR; | |
400 | } | |
401 | const int32_t *pRoot = (const int32_t *)( | |
402 | (const char *)header + header->dataHeader.headerSize); | |
403 | poolBundle.fIndexes = pRoot + 1; | |
404 | indexLength = poolBundle.fIndexes[URES_INDEX_LENGTH] & 0xff; | |
405 | if (indexLength <= URES_INDEX_POOL_CHECKSUM) { | |
406 | fprintf(stderr, "insufficient indexes[] in pool bundle file %s\n", poolFileName.data()); | |
407 | return U_INVALID_FORMAT_ERROR; | |
408 | } | |
409 | int32_t keysBottom = 1 + indexLength; | |
410 | int32_t keysTop = poolBundle.fIndexes[URES_INDEX_KEYS_TOP]; | |
411 | poolBundle.fKeys = (const char *)(pRoot + keysBottom); | |
412 | poolBundle.fKeysLength = (keysTop - keysBottom) * 4; | |
413 | poolBundle.fChecksum = poolBundle.fIndexes[URES_INDEX_POOL_CHECKSUM]; | |
414 | ||
729e4ab9 A |
415 | for (i = 0; i < poolBundle.fKeysLength; ++i) { |
416 | if (poolBundle.fKeys[i] == 0) { | |
417 | ++poolBundle.fKeysCount; | |
418 | } | |
419 | } | |
2ca993e8 A |
420 | |
421 | // 16BitUnits[] begins with strings-v2. | |
422 | // The strings-v2 may optionally be terminated by what looks like | |
423 | // an explicit string length that exceeds the number of remaining 16-bit units. | |
424 | int32_t stringUnitsLength = (poolBundle.fIndexes[URES_INDEX_16BIT_TOP] - keysTop) * 2; | |
425 | if (stringUnitsLength >= 2 && getFormatVersion() >= 3) { | |
426 | poolBundle.fStrings = new PseudoListResource(NULL, status); | |
427 | if (poolBundle.fStrings == NULL) { | |
428 | fprintf(stderr, "unable to allocate memory for the pool bundle strings %s\n", | |
429 | poolFileName.data()); | |
430 | return U_MEMORY_ALLOCATION_ERROR; | |
431 | } | |
432 | // The PseudoListResource constructor call did not allocate further memory. | |
433 | assert(U_SUCCESS(status)); | |
434 | const UChar *p = (const UChar *)(pRoot + keysTop); | |
435 | int32_t remaining = stringUnitsLength; | |
436 | do { | |
437 | int32_t first = *p; | |
438 | int8_t numCharsForLength; | |
439 | int32_t length; | |
440 | if (!U16_IS_TRAIL(first)) { | |
441 | // NUL-terminated | |
442 | numCharsForLength = 0; | |
443 | for (length = 0; | |
444 | length < remaining && p[length] != 0; | |
445 | ++length) {} | |
446 | } else if (first < 0xdfef) { | |
447 | numCharsForLength = 1; | |
448 | length = first & 0x3ff; | |
449 | } else if (first < 0xdfff && remaining >= 2) { | |
450 | numCharsForLength = 2; | |
451 | length = ((first - 0xdfef) << 16) | p[1]; | |
452 | } else if (first == 0xdfff && remaining >= 3) { | |
453 | numCharsForLength = 3; | |
454 | length = ((int32_t)p[1] << 16) | p[2]; | |
455 | } else { | |
456 | break; // overrun | |
457 | } | |
458 | // Check for overrun before changing remaining, | |
459 | // so that it is always accurate after the loop body. | |
460 | if ((numCharsForLength + length) >= remaining || | |
461 | p[numCharsForLength + length] != 0) { | |
462 | break; // overrun or explicitly terminated | |
463 | } | |
464 | int32_t poolStringIndex = stringUnitsLength - remaining; | |
465 | // Maximum pool string index when suffix-sharing the last character. | |
466 | int32_t maxStringIndex = poolStringIndex + numCharsForLength + length - 1; | |
467 | if (maxStringIndex >= RES_MAX_OFFSET) { | |
468 | // pool string index overrun | |
469 | break; | |
470 | } | |
471 | p += numCharsForLength; | |
472 | remaining -= numCharsForLength; | |
473 | if (length != 0) { | |
474 | StringResource *sr = | |
475 | new StringResource(poolStringIndex, numCharsForLength, | |
476 | p, length, status); | |
477 | if (sr == NULL) { | |
478 | fprintf(stderr, "unable to allocate memory for a pool bundle string %s\n", | |
479 | poolFileName.data()); | |
480 | return U_MEMORY_ALLOCATION_ERROR; | |
481 | } | |
482 | poolBundle.fStrings->add(sr); | |
483 | poolBundle.fStringIndexLimit = maxStringIndex + 1; | |
484 | // The StringResource constructor did not allocate further memory. | |
485 | assert(U_SUCCESS(status)); | |
486 | } | |
487 | p += length + 1; | |
488 | remaining -= length + 1; | |
489 | } while (remaining > 0); | |
490 | if (poolBundle.fStrings->fCount == 0) { | |
491 | delete poolBundle.fStrings; | |
492 | poolBundle.fStrings = NULL; | |
493 | } | |
494 | } | |
495 | ||
729e4ab9 A |
496 | T_FileStream_close(poolFile); |
497 | setUsePoolBundle(TRUE); | |
2ca993e8 A |
498 | if (isVerbose() && poolBundle.fStrings != NULL) { |
499 | printf("number of shared strings: %d\n", (int)poolBundle.fStrings->fCount); | |
500 | int32_t length = poolBundle.fStringIndexLimit + 1; // incl. last NUL | |
501 | printf("16-bit units for strings: %6d = %6d bytes\n", | |
502 | (int)length, (int)length * 2); | |
503 | } | |
504 | } | |
505 | ||
506 | if(!options[FORMAT_VERSION].doesOccur && getFormatVersion() == 3 && | |
507 | poolBundle.fStrings == NULL && | |
508 | !options[WRITE_POOL_BUNDLE].doesOccur) { | |
509 | // If we just default to formatVersion 3 | |
510 | // but there are no pool bundle strings to share | |
511 | // and we do not write a pool bundle, | |
512 | // then write formatVersion 2 which is just as good. | |
513 | setFormatVersion(2); | |
729e4ab9 A |
514 | } |
515 | ||
516 | if(options[INCLUDE_UNIHAN_COLL].doesOccur) { | |
b331163b A |
517 | puts("genrb option --includeUnihanColl ignored: \n" |
518 | "CLDR 26/ICU 54 unihan data is small, except\n" | |
519 | "the ucadata-unihan.icu version of the collation root data\n" | |
520 | "is about 300kB larger than the ucadata-implicithan.icu version."); | |
729e4ab9 A |
521 | } |
522 | ||
523 | if((argc-1)!=1) { | |
524 | printf("genrb number of files: %d\n", argc - 1); | |
525 | } | |
b75a7d8f A |
526 | /* generate the binary files */ |
527 | for(i = 1; i < argc; ++i) { | |
528 | status = U_ZERO_ERROR; | |
529 | arg = getLongPathname(argv[i]); | |
374ca955 | 530 | |
2ca993e8 | 531 | CharString theCurrentFileName; |
b75a7d8f | 532 | if (inputDir) { |
2ca993e8 A |
533 | theCurrentFileName.append(inputDir, status); |
534 | } | |
535 | theCurrentFileName.appendPathPart(arg, status); | |
536 | if (U_FAILURE(status)) { | |
537 | break; | |
b75a7d8f | 538 | } |
b75a7d8f | 539 | |
2ca993e8 | 540 | gCurrentFileName = theCurrentFileName.data(); |
b75a7d8f | 541 | if (isVerbose()) { |
2ca993e8 | 542 | printf("Processing file \"%s\"\n", theCurrentFileName.data()); |
b75a7d8f | 543 | } |
3d1f044b | 544 | processFile(arg, encoding, inputDir, outputDir, filterDir, NULL, |
2ca993e8 A |
545 | newPoolBundle.getAlias(), |
546 | options[NO_BINARY_COLLATION].doesOccur, status); | |
b75a7d8f A |
547 | } |
548 | ||
2ca993e8 | 549 | poolBundle.close(); |
729e4ab9 | 550 | |
2ca993e8 | 551 | if(U_SUCCESS(status) && options[WRITE_POOL_BUNDLE].doesOccur) { |
3d1f044b A |
552 | const char* writePoolDir; |
553 | if (options[WRITE_POOL_BUNDLE].value!=NULL) { | |
554 | writePoolDir = options[WRITE_POOL_BUNDLE].value; | |
555 | } else { | |
556 | writePoolDir = outputDir; | |
557 | } | |
729e4ab9 | 558 | char outputFileName[256]; |
3d1f044b | 559 | newPoolBundle->write(writePoolDir, NULL, outputFileName, sizeof(outputFileName), status); |
729e4ab9 A |
560 | if(U_FAILURE(status)) { |
561 | fprintf(stderr, "unable to write the pool bundle: %s\n", u_errorName(status)); | |
562 | } | |
563 | } | |
564 | ||
51004dcb A |
565 | u_cleanup(); |
566 | ||
46f4442e | 567 | /* Dont return warnings as a failure */ |
729e4ab9 | 568 | if (U_SUCCESS(status)) { |
46f4442e A |
569 | return 0; |
570 | } | |
571 | ||
b75a7d8f A |
572 | return status; |
573 | } | |
574 | ||
575 | /* Process a file */ | |
729e4ab9 | 576 | void |
2ca993e8 | 577 | processFile(const char *filename, const char *cp, |
3d1f044b A |
578 | const char *inputDir, const char *outputDir, const char *filterDir, |
579 | const char *packageName, | |
2ca993e8 A |
580 | SRBRoot *newPoolBundle, |
581 | UBool omitBinaryCollation, UErrorCode &status) { | |
582 | LocalPointer<SRBRoot> data; | |
3d1f044b A |
583 | LocalUCHARBUFPointer ucbuf; |
584 | CharString openFileName; | |
585 | CharString inputDirBuf; | |
374ca955 | 586 | |
3d1f044b | 587 | char outputFileName[256]; |
b75a7d8f | 588 | int32_t dirlen = 0; |
374ca955 | 589 | |
2ca993e8 | 590 | if (U_FAILURE(status)) { |
b75a7d8f A |
591 | return; |
592 | } | |
593 | if(filename==NULL){ | |
2ca993e8 | 594 | status=U_ILLEGAL_ARGUMENT_ERROR; |
b75a7d8f | 595 | return; |
b75a7d8f | 596 | } |
4388f060 | 597 | |
b75a7d8f A |
598 | if(inputDir == NULL) { |
599 | const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR); | |
b75a7d8f A |
600 | if (filenameBegin != NULL) { |
601 | /* | |
374ca955 | 602 | * When a filename ../../../data/root.txt is specified, |
b75a7d8f A |
603 | * we presume that the input directory is ../../../data |
604 | * This is very important when the resource file includes | |
605 | * another file, like UCARules.txt or thaidict.brk. | |
606 | */ | |
374ca955 | 607 | int32_t filenameSize = (int32_t)(filenameBegin - filename + 1); |
3d1f044b | 608 | inputDirBuf.append(filename, filenameSize, status); |
b75a7d8f | 609 | |
3d1f044b A |
610 | inputDir = inputDirBuf.data(); |
611 | dirlen = inputDirBuf.length(); | |
b75a7d8f A |
612 | } |
613 | }else{ | |
614 | dirlen = (int32_t)uprv_strlen(inputDir); | |
615 | ||
616 | if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) { | |
b75a7d8f | 617 | /* |
374ca955 | 618 | * append the input dir to openFileName if the first char in |
b75a7d8f A |
619 | * filename is not file seperation char and the last char input directory is not '.'. |
620 | * This is to support : | |
621 | * genrb -s. /home/icu/data | |
622 | * genrb -s. icu/data | |
623 | * The user cannot mix notations like | |
624 | * genrb -s. /icu/data --- the absolute path specified. -s redundant | |
625 | * user should use | |
626 | * genrb -s. icu/data --- start from CWD and look in icu/data dir | |
627 | */ | |
628 | if( (filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){ | |
3d1f044b | 629 | openFileName.append(inputDir, status); |
b75a7d8f | 630 | } |
b75a7d8f | 631 | } else { |
3d1f044b | 632 | openFileName.append(inputDir, status); |
b75a7d8f A |
633 | } |
634 | } | |
3d1f044b | 635 | openFileName.appendPathPart(filename, status); |
b75a7d8f | 636 | |
3d1f044b A |
637 | // Test for CharString failure |
638 | if (U_FAILURE(status)) { | |
639 | return; | |
640 | } | |
b75a7d8f | 641 | |
3d1f044b | 642 | ucbuf.adoptInstead(ucbuf_open(openFileName.data(), &cp,getShowWarning(),TRUE, &status)); |
2ca993e8 | 643 | if(status == U_FILE_ACCESS_ERROR) { |
374ca955 | 644 | |
3d1f044b A |
645 | fprintf(stderr, "couldn't open file %s\n", openFileName.data()); |
646 | return; | |
b75a7d8f | 647 | } |
3d1f044b | 648 | if (ucbuf.isNull() || U_FAILURE(status)) { |
0f5d89e8 | 649 | fprintf(stderr, "An error occurred processing file %s. Error: %s\n", |
3d1f044b A |
650 | openFileName.data(), u_errorName(status)); |
651 | return; | |
b75a7d8f A |
652 | } |
653 | /* auto detected popular encodings? */ | |
374ca955 | 654 | if (cp!=NULL && isVerbose()) { |
b75a7d8f A |
655 | printf("autodetected encoding %s\n", cp); |
656 | } | |
657 | /* Parse the data into an SRBRoot */ | |
3d1f044b | 658 | data.adoptInstead(parse(ucbuf.getAlias(), inputDir, outputDir, filename, |
2ca993e8 | 659 | !omitBinaryCollation, options[NO_COLLATION_RULES].doesOccur, &status)); |
b75a7d8f | 660 | |
2ca993e8 A |
661 | if (data.isNull() || U_FAILURE(status)) { |
662 | fprintf(stderr, "couldn't parse the file %s. Error:%s\n", filename, u_errorName(status)); | |
3d1f044b | 663 | return; |
b75a7d8f | 664 | } |
3d1f044b A |
665 | |
666 | // Run filtering before writing pool bundle | |
667 | if (filterDir != nullptr) { | |
668 | CharString filterFileName(filterDir, status); | |
669 | filterFileName.appendPathPart(filename, status); | |
670 | if (U_FAILURE(status)) { | |
671 | return; | |
672 | } | |
673 | ||
674 | // Open the file and read it into filter | |
675 | SimpleRuleBasedPathFilter filter; | |
676 | std::ifstream f(filterFileName.data()); | |
677 | if (f.fail()) { | |
678 | std::cerr << "genrb error: unable to open " << filterFileName.data() << std::endl; | |
679 | status = U_FILE_ACCESS_ERROR; | |
680 | return; | |
681 | } | |
682 | std::string currentLine; | |
683 | while (std::getline(f, currentLine)) { | |
684 | // Ignore # comments and empty lines | |
685 | if (currentLine.empty() || currentLine[0] == '#') { | |
686 | continue; | |
687 | } | |
688 | filter.addRule(currentLine, status); | |
689 | if (U_FAILURE(status)) { | |
690 | return; | |
691 | } | |
692 | } | |
693 | ||
694 | if (isVerbose()) { | |
695 | filter.print(std::cout); | |
696 | } | |
697 | ||
698 | // Apply the filter to the data | |
699 | ResKeyPath path; | |
700 | data->fRoot->applyFilter(filter, path, data.getAlias()); | |
701 | } | |
702 | ||
729e4ab9 | 703 | if(options[WRITE_POOL_BUNDLE].doesOccur) { |
2ca993e8 A |
704 | data->fWritePoolBundle = newPoolBundle; |
705 | data->compactKeys(status); | |
729e4ab9 | 706 | int32_t newKeysLength; |
2ca993e8 A |
707 | const char *newKeys = data->getKeyBytes(&newKeysLength); |
708 | newPoolBundle->addKeyBytes(newKeys, newKeysLength, status); | |
709 | if(U_FAILURE(status)) { | |
729e4ab9 | 710 | fprintf(stderr, "bundle_compactKeys(%s) or bundle_getKeyBytes() failed: %s\n", |
2ca993e8 | 711 | filename, u_errorName(status)); |
3d1f044b | 712 | return; |
729e4ab9 A |
713 | } |
714 | /* count the number of just-added key strings */ | |
2ca993e8 | 715 | for(const char *newKeysLimit = newKeys + newKeysLength; newKeys < newKeysLimit; ++newKeys) { |
729e4ab9 A |
716 | if(*newKeys == 0) { |
717 | ++newPoolBundle->fKeysCount; | |
718 | } | |
719 | } | |
720 | } | |
721 | ||
722 | if(options[USE_POOL_BUNDLE].doesOccur) { | |
2ca993e8 | 723 | data->fUsePoolBundle = &poolBundle; |
729e4ab9 | 724 | } |
b75a7d8f A |
725 | |
726 | /* Determine the target rb filename */ | |
3d1f044b | 727 | uprv_free(make_res_filename(filename, outputDir, packageName, status)); |
2ca993e8 A |
728 | if(U_FAILURE(status)) { |
729 | fprintf(stderr, "couldn't make the res fileName for bundle %s. Error:%s\n", | |
730 | filename, u_errorName(status)); | |
3d1f044b | 731 | return; |
b75a7d8f A |
732 | } |
733 | if(write_java== TRUE){ | |
2ca993e8 A |
734 | bundle_write_java(data.getAlias(), outputDir, outputEnc, |
735 | outputFileName, sizeof(outputFileName), | |
736 | options[JAVA_PACKAGE].value, options[BUNDLE_NAME].value, &status); | |
374ca955 | 737 | }else if(write_xliff ==TRUE){ |
2ca993e8 A |
738 | bundle_write_xml(data.getAlias(), outputDir, outputEnc, |
739 | filename, outputFileName, sizeof(outputFileName), | |
740 | language, xliffOutputFileName, &status); | |
b75a7d8f A |
741 | }else{ |
742 | /* Write the data to the file */ | |
2ca993e8 | 743 | data->write(outputDir, packageName, outputFileName, sizeof(outputFileName), status); |
b75a7d8f | 744 | } |
2ca993e8 A |
745 | if (U_FAILURE(status)) { |
746 | fprintf(stderr, "couldn't write bundle %s. Error:%s\n", outputFileName, u_errorName(status)); | |
b75a7d8f | 747 | } |
b75a7d8f A |
748 | } |
749 | ||
750 | /* Generate the target .res file name from the input file name */ | |
751 | static char* | |
752 | make_res_filename(const char *filename, | |
753 | const char *outputDir, | |
754 | const char *packageName, | |
2ca993e8 | 755 | UErrorCode &status) { |
b75a7d8f A |
756 | char *basename; |
757 | char *dirname; | |
758 | char *resName; | |
759 | ||
760 | int32_t pkgLen = 0; /* length of package prefix */ | |
761 | ||
4388f060 | 762 | |
2ca993e8 | 763 | if (U_FAILURE(status)) { |
b75a7d8f A |
764 | return 0; |
765 | } | |
766 | ||
767 | if(packageName != NULL) | |
768 | { | |
374ca955 | 769 | pkgLen = (int32_t)(1 + uprv_strlen(packageName)); |
b75a7d8f A |
770 | } |
771 | ||
772 | /* setup */ | |
773 | basename = dirname = resName = 0; | |
774 | ||
775 | /* determine basename, and compiled file names */ | |
776 | basename = (char*) uprv_malloc(sizeof(char) * (uprv_strlen(filename) + 1)); | |
777 | if(basename == 0) { | |
2ca993e8 | 778 | status = U_MEMORY_ALLOCATION_ERROR; |
b75a7d8f A |
779 | goto finish; |
780 | } | |
781 | ||
782 | get_basename(basename, filename); | |
783 | ||
784 | dirname = (char*) uprv_malloc(sizeof(char) * (uprv_strlen(filename) + 1)); | |
785 | if(dirname == 0) { | |
2ca993e8 | 786 | status = U_MEMORY_ALLOCATION_ERROR; |
b75a7d8f A |
787 | goto finish; |
788 | } | |
789 | ||
790 | get_dirname(dirname, filename); | |
791 | ||
792 | if (outputDir == NULL) { | |
793 | /* output in same dir as .txt */ | |
794 | resName = (char*) uprv_malloc(sizeof(char) * (uprv_strlen(dirname) | |
795 | + pkgLen | |
796 | + uprv_strlen(basename) | |
797 | + uprv_strlen(RES_SUFFIX) + 8)); | |
798 | if(resName == 0) { | |
2ca993e8 | 799 | status = U_MEMORY_ALLOCATION_ERROR; |
b75a7d8f A |
800 | goto finish; |
801 | } | |
802 | ||
803 | uprv_strcpy(resName, dirname); | |
804 | ||
805 | if(packageName != NULL) | |
806 | { | |
807 | uprv_strcat(resName, packageName); | |
808 | uprv_strcat(resName, "_"); | |
809 | } | |
810 | ||
811 | uprv_strcat(resName, basename); | |
812 | ||
813 | } else { | |
814 | int32_t dirlen = (int32_t)uprv_strlen(outputDir); | |
815 | int32_t basenamelen = (int32_t)uprv_strlen(basename); | |
816 | ||
817 | resName = (char*) uprv_malloc(sizeof(char) * (dirlen + pkgLen + basenamelen + 8)); | |
818 | ||
819 | if (resName == NULL) { | |
2ca993e8 | 820 | status = U_MEMORY_ALLOCATION_ERROR; |
b75a7d8f A |
821 | goto finish; |
822 | } | |
823 | ||
824 | uprv_strcpy(resName, outputDir); | |
825 | ||
826 | if(outputDir[dirlen] != U_FILE_SEP_CHAR) { | |
827 | resName[dirlen] = U_FILE_SEP_CHAR; | |
828 | resName[dirlen + 1] = '\0'; | |
829 | } | |
830 | ||
831 | if(packageName != NULL) | |
832 | { | |
833 | uprv_strcat(resName, packageName); | |
834 | uprv_strcat(resName, "_"); | |
835 | } | |
836 | ||
837 | uprv_strcat(resName, basename); | |
838 | } | |
839 | ||
840 | finish: | |
841 | uprv_free(basename); | |
842 | uprv_free(dirname); | |
843 | ||
844 | return resName; | |
845 | } | |
846 | ||
847 | /* | |
848 | * Local Variables: | |
849 | * indent-tabs-mode: nil | |
850 | * End: | |
851 | */ |