]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
2ca993e8 A |
3 | /* |
4 | ******************************************************************************* | |
5 | * | |
6 | * Copyright (C) 2000-2015, International Business Machines | |
7 | * Corporation and others. All Rights Reserved. | |
8 | * | |
9 | ******************************************************************************* | |
10 | * | |
11 | * File reslist.cpp | |
12 | * | |
13 | * Modification History: | |
14 | * | |
15 | * Date Name Description | |
16 | * 02/21/00 weiv Creation. | |
17 | ******************************************************************************* | |
18 | */ | |
19 | ||
20 | // Safer use of UnicodeString. | |
21 | #ifndef UNISTR_FROM_CHAR_EXPLICIT | |
22 | # define UNISTR_FROM_CHAR_EXPLICIT explicit | |
23 | #endif | |
24 | ||
25 | // Less important, but still a good idea. | |
26 | #ifndef UNISTR_FROM_STRING_EXPLICIT | |
27 | # define UNISTR_FROM_STRING_EXPLICIT explicit | |
28 | #endif | |
29 | ||
30 | #include <assert.h> | |
3d1f044b A |
31 | #include <iostream> |
32 | #include <set> | |
2ca993e8 | 33 | #include <stdio.h> |
3d1f044b | 34 | |
2ca993e8 A |
35 | #include "unicode/localpointer.h" |
36 | #include "reslist.h" | |
37 | #include "unewdata.h" | |
38 | #include "unicode/ures.h" | |
39 | #include "unicode/putil.h" | |
40 | #include "errmsg.h" | |
3d1f044b | 41 | #include "filterrb.h" |
2ca993e8 A |
42 | |
43 | #include "uarrsort.h" | |
44 | #include "uelement.h" | |
45 | #include "uhash.h" | |
46 | #include "uinvchar.h" | |
47 | #include "ustr_imp.h" | |
48 | #include "unicode/utf16.h" | |
3d1f044b A |
49 | #include "uassert.h" |
50 | ||
2ca993e8 A |
51 | /* |
52 | * Align binary data at a 16-byte offset from the start of the resource bundle, | |
53 | * to be safe for any data type it may contain. | |
54 | */ | |
55 | #define BIN_ALIGNMENT 16 | |
56 | ||
57 | // This numeric constant must be at least 1. | |
58 | // If StringResource.fNumUnitsSaved == 0 then the string occurs only once, | |
59 | // and it makes no sense to move it to the pool bundle. | |
60 | // The larger the threshold for fNumUnitsSaved | |
61 | // the smaller the savings, and the smaller the pool bundle. | |
62 | // We trade some total size reduction to reduce the pool bundle a bit, | |
63 | // so that one can reasonably save data size by | |
64 | // removing bundle files without rebuilding the pool bundle. | |
65 | // This can also help to keep the pool and total (pool+local) string indexes | |
66 | // within 16 bits, that is, within range of Table16 and Array16 containers. | |
67 | #ifndef GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING | |
68 | # define GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING 10 | |
69 | #endif | |
70 | ||
71 | U_NAMESPACE_USE | |
72 | ||
73 | static UBool gIncludeCopyright = FALSE; | |
74 | static UBool gUsePoolBundle = FALSE; | |
75 | static UBool gIsDefaultFormatVersion = TRUE; | |
76 | static int32_t gFormatVersion = 3; | |
77 | ||
78 | /* How do we store string values? */ | |
79 | enum { | |
80 | STRINGS_UTF16_V1, /* formatVersion 1: int length + UChars + NUL + padding to 4 bytes */ | |
81 | STRINGS_UTF16_V2 /* formatVersion 2 & up: optional length in 1..3 UChars + UChars + NUL */ | |
82 | }; | |
83 | ||
84 | static const int32_t MAX_IMPLICIT_STRING_LENGTH = 40; /* do not store the length explicitly for such strings */ | |
85 | ||
86 | static const ResFile kNoPoolBundle; | |
87 | ||
88 | /* | |
89 | * res_none() returns the address of kNoResource, | |
90 | * for use in non-error cases when no resource is to be added to the bundle. | |
91 | * (NULL is used in error cases.) | |
92 | */ | |
93 | static SResource kNoResource; // TODO: const | |
94 | ||
95 | static UDataInfo dataInfo= { | |
96 | sizeof(UDataInfo), | |
97 | 0, | |
98 | ||
99 | U_IS_BIG_ENDIAN, | |
100 | U_CHARSET_FAMILY, | |
101 | sizeof(UChar), | |
102 | 0, | |
103 | ||
104 | {0x52, 0x65, 0x73, 0x42}, /* dataFormat="ResB" */ | |
105 | {1, 3, 0, 0}, /* formatVersion */ | |
106 | {1, 4, 0, 0} /* dataVersion take a look at version inside parsed resb*/ | |
107 | }; | |
108 | ||
109 | static const UVersionInfo gFormatVersions[4] = { /* indexed by a major-formatVersion integer */ | |
110 | { 0, 0, 0, 0 }, | |
111 | { 1, 3, 0, 0 }, | |
112 | { 2, 0, 0, 0 }, | |
113 | { 3, 0, 0, 0 } | |
114 | }; | |
115 | // Remember to update genrb.h GENRB_VERSION when changing the data format. | |
116 | // (Or maybe we should remove GENRB_VERSION and report the ICU version number?) | |
117 | ||
118 | static uint8_t calcPadding(uint32_t size) { | |
119 | /* returns space we need to pad */ | |
120 | return (uint8_t) ((size % sizeof(uint32_t)) ? (sizeof(uint32_t) - (size % sizeof(uint32_t))) : 0); | |
121 | ||
122 | } | |
123 | ||
124 | void setIncludeCopyright(UBool val){ | |
125 | gIncludeCopyright=val; | |
126 | } | |
127 | ||
128 | UBool getIncludeCopyright(void){ | |
129 | return gIncludeCopyright; | |
130 | } | |
131 | ||
132 | void setFormatVersion(int32_t formatVersion) { | |
133 | gIsDefaultFormatVersion = FALSE; | |
134 | gFormatVersion = formatVersion; | |
135 | } | |
136 | ||
137 | int32_t getFormatVersion() { | |
138 | return gFormatVersion; | |
139 | } | |
140 | ||
141 | void setUsePoolBundle(UBool use) { | |
142 | gUsePoolBundle = use; | |
143 | } | |
144 | ||
145 | // TODO: return const pointer, or find another way to express "none" | |
146 | struct SResource* res_none() { | |
147 | return &kNoResource; | |
148 | } | |
149 | ||
150 | SResource::SResource() | |
151 | : fType(URES_NONE), fWritten(FALSE), fRes(RES_BOGUS), fRes16(-1), fKey(-1), fKey16(-1), | |
152 | line(0), fNext(NULL) { | |
153 | ustr_init(&fComment); | |
154 | } | |
155 | ||
156 | SResource::SResource(SRBRoot *bundle, const char *tag, int8_t type, const UString* comment, | |
157 | UErrorCode &errorCode) | |
158 | : fType(type), fWritten(FALSE), fRes(RES_BOGUS), fRes16(-1), | |
159 | fKey(bundle != NULL ? bundle->addTag(tag, errorCode) : -1), fKey16(-1), | |
160 | line(0), fNext(NULL) { | |
161 | ustr_init(&fComment); | |
162 | if(comment != NULL) { | |
163 | ustr_cpy(&fComment, comment, &errorCode); | |
164 | } | |
165 | } | |
166 | ||
167 | SResource::~SResource() { | |
168 | ustr_deinit(&fComment); | |
169 | } | |
170 | ||
171 | ContainerResource::~ContainerResource() { | |
172 | SResource *current = fFirst; | |
173 | while (current != NULL) { | |
174 | SResource *next = current->fNext; | |
175 | delete current; | |
176 | current = next; | |
177 | } | |
178 | } | |
179 | ||
180 | TableResource::~TableResource() {} | |
181 | ||
182 | // TODO: clarify that containers adopt new items, even in error cases; use LocalPointer | |
183 | void TableResource::add(SResource *res, int linenumber, UErrorCode &errorCode) { | |
184 | if (U_FAILURE(errorCode) || res == NULL || res == &kNoResource) { | |
185 | return; | |
186 | } | |
187 | ||
188 | /* remember this linenumber to report to the user if there is a duplicate key */ | |
189 | res->line = linenumber; | |
190 | ||
191 | /* here we need to traverse the list */ | |
192 | ++fCount; | |
193 | ||
194 | /* is the list still empty? */ | |
195 | if (fFirst == NULL) { | |
196 | fFirst = res; | |
197 | res->fNext = NULL; | |
198 | return; | |
199 | } | |
200 | ||
201 | const char *resKeyString = fRoot->fKeys + res->fKey; | |
202 | ||
203 | SResource *current = fFirst; | |
204 | ||
205 | SResource *prev = NULL; | |
206 | while (current != NULL) { | |
207 | const char *currentKeyString = fRoot->fKeys + current->fKey; | |
208 | int diff; | |
209 | /* | |
210 | * formatVersion 1: compare key strings in native-charset order | |
211 | * formatVersion 2 and up: compare key strings in ASCII order | |
212 | */ | |
213 | if (gFormatVersion == 1 || U_CHARSET_FAMILY == U_ASCII_FAMILY) { | |
214 | diff = uprv_strcmp(currentKeyString, resKeyString); | |
215 | } else { | |
216 | diff = uprv_compareInvCharsAsAscii(currentKeyString, resKeyString); | |
217 | } | |
218 | if (diff < 0) { | |
219 | prev = current; | |
220 | current = current->fNext; | |
221 | } else if (diff > 0) { | |
222 | /* we're either in front of the list, or in the middle */ | |
223 | if (prev == NULL) { | |
224 | /* front of the list */ | |
225 | fFirst = res; | |
226 | } else { | |
227 | /* middle of the list */ | |
228 | prev->fNext = res; | |
229 | } | |
230 | ||
231 | res->fNext = current; | |
232 | return; | |
233 | } else { | |
234 | /* Key already exists! ERROR! */ | |
235 | error(linenumber, "duplicate key '%s' in table, first appeared at line %d", currentKeyString, current->line); | |
236 | errorCode = U_UNSUPPORTED_ERROR; | |
237 | return; | |
238 | } | |
239 | } | |
240 | ||
241 | /* end of list */ | |
242 | prev->fNext = res; | |
243 | res->fNext = NULL; | |
244 | } | |
245 | ||
246 | ArrayResource::~ArrayResource() {} | |
247 | ||
248 | void ArrayResource::add(SResource *res) { | |
249 | if (res != NULL && res != &kNoResource) { | |
250 | if (fFirst == NULL) { | |
251 | fFirst = res; | |
252 | } else { | |
253 | fLast->fNext = res; | |
254 | } | |
255 | fLast = res; | |
256 | ++fCount; | |
257 | } | |
258 | } | |
259 | ||
260 | PseudoListResource::~PseudoListResource() {} | |
261 | ||
262 | void PseudoListResource::add(SResource *res) { | |
263 | if (res != NULL && res != &kNoResource) { | |
264 | res->fNext = fFirst; | |
265 | fFirst = res; | |
266 | ++fCount; | |
267 | } | |
268 | } | |
269 | ||
270 | StringBaseResource::StringBaseResource(SRBRoot *bundle, const char *tag, int8_t type, | |
271 | const UChar *value, int32_t len, | |
272 | const UString* comment, UErrorCode &errorCode) | |
273 | : SResource(bundle, tag, type, comment, errorCode) { | |
274 | if (len == 0 && gFormatVersion > 1) { | |
275 | fRes = URES_MAKE_EMPTY_RESOURCE(type); | |
276 | fWritten = TRUE; | |
277 | return; | |
278 | } | |
279 | ||
f3c0d7a5 | 280 | fString.setTo(ConstChar16Ptr(value), len); |
2ca993e8 A |
281 | fString.getTerminatedBuffer(); // Some code relies on NUL-termination. |
282 | if (U_SUCCESS(errorCode) && fString.isBogus()) { | |
283 | errorCode = U_MEMORY_ALLOCATION_ERROR; | |
284 | } | |
285 | } | |
286 | ||
287 | StringBaseResource::StringBaseResource(SRBRoot *bundle, int8_t type, | |
288 | const icu::UnicodeString &value, UErrorCode &errorCode) | |
289 | : SResource(bundle, NULL, type, NULL, errorCode), fString(value) { | |
290 | if (value.isEmpty() && gFormatVersion > 1) { | |
291 | fRes = URES_MAKE_EMPTY_RESOURCE(type); | |
292 | fWritten = TRUE; | |
293 | return; | |
294 | } | |
295 | ||
296 | fString.getTerminatedBuffer(); // Some code relies on NUL-termination. | |
297 | if (U_SUCCESS(errorCode) && fString.isBogus()) { | |
298 | errorCode = U_MEMORY_ALLOCATION_ERROR; | |
299 | } | |
300 | } | |
301 | ||
302 | // Pool bundle string, alias the buffer. Guaranteed NUL-terminated and not empty. | |
303 | StringBaseResource::StringBaseResource(int8_t type, const UChar *value, int32_t len, | |
304 | UErrorCode &errorCode) | |
305 | : SResource(NULL, NULL, type, NULL, errorCode), fString(TRUE, value, len) { | |
306 | assert(len > 0); | |
307 | assert(!fString.isBogus()); | |
308 | } | |
309 | ||
310 | StringBaseResource::~StringBaseResource() {} | |
311 | ||
312 | static int32_t U_CALLCONV | |
313 | string_hash(const UElement key) { | |
314 | const StringResource *res = static_cast<const StringResource *>(key.pointer); | |
315 | return res->fString.hashCode(); | |
316 | } | |
317 | ||
318 | static UBool U_CALLCONV | |
319 | string_comp(const UElement key1, const UElement key2) { | |
320 | const StringResource *res1 = static_cast<const StringResource *>(key1.pointer); | |
321 | const StringResource *res2 = static_cast<const StringResource *>(key2.pointer); | |
322 | return res1->fString == res2->fString; | |
323 | } | |
324 | ||
325 | StringResource::~StringResource() {} | |
326 | ||
327 | AliasResource::~AliasResource() {} | |
328 | ||
329 | IntResource::IntResource(SRBRoot *bundle, const char *tag, int32_t value, | |
330 | const UString* comment, UErrorCode &errorCode) | |
331 | : SResource(bundle, tag, URES_INT, comment, errorCode) { | |
332 | fValue = value; | |
333 | fRes = URES_MAKE_RESOURCE(URES_INT, value & RES_MAX_OFFSET); | |
334 | fWritten = TRUE; | |
335 | } | |
336 | ||
337 | IntResource::~IntResource() {} | |
338 | ||
339 | IntVectorResource::IntVectorResource(SRBRoot *bundle, const char *tag, | |
340 | const UString* comment, UErrorCode &errorCode) | |
341 | : SResource(bundle, tag, URES_INT_VECTOR, comment, errorCode), | |
342 | fCount(0), fArray(new uint32_t[RESLIST_MAX_INT_VECTOR]) { | |
343 | if (fArray == NULL) { | |
344 | errorCode = U_MEMORY_ALLOCATION_ERROR; | |
345 | return; | |
346 | } | |
347 | } | |
348 | ||
349 | IntVectorResource::~IntVectorResource() { | |
350 | delete[] fArray; | |
351 | } | |
352 | ||
353 | void IntVectorResource::add(int32_t value, UErrorCode &errorCode) { | |
354 | if (U_SUCCESS(errorCode)) { | |
355 | fArray[fCount++] = value; | |
356 | } | |
357 | } | |
358 | ||
359 | BinaryResource::BinaryResource(SRBRoot *bundle, const char *tag, | |
360 | uint32_t length, uint8_t *data, const char* fileName, | |
361 | const UString* comment, UErrorCode &errorCode) | |
362 | : SResource(bundle, tag, URES_BINARY, comment, errorCode), | |
363 | fLength(length), fData(NULL), fFileName(NULL) { | |
364 | if (U_FAILURE(errorCode)) { | |
365 | return; | |
366 | } | |
367 | if (fileName != NULL && *fileName != 0){ | |
368 | fFileName = new char[uprv_strlen(fileName)+1]; | |
369 | if (fFileName == NULL) { | |
370 | errorCode = U_MEMORY_ALLOCATION_ERROR; | |
371 | return; | |
372 | } | |
373 | uprv_strcpy(fFileName, fileName); | |
374 | } | |
375 | if (length > 0) { | |
376 | fData = new uint8_t[length]; | |
377 | if (fData == NULL) { | |
378 | errorCode = U_MEMORY_ALLOCATION_ERROR; | |
379 | return; | |
380 | } | |
381 | uprv_memcpy(fData, data, length); | |
382 | } else { | |
383 | if (gFormatVersion > 1) { | |
384 | fRes = URES_MAKE_EMPTY_RESOURCE(URES_BINARY); | |
385 | fWritten = TRUE; | |
386 | } | |
387 | } | |
388 | } | |
389 | ||
390 | BinaryResource::~BinaryResource() { | |
391 | delete[] fData; | |
392 | delete[] fFileName; | |
393 | } | |
394 | ||
395 | /* Writing Functions */ | |
396 | ||
397 | void | |
398 | StringResource::handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet, | |
399 | UErrorCode &errorCode) { | |
400 | assert(fSame == NULL); | |
401 | fSame = static_cast<StringResource *>(uhash_get(stringSet, this)); | |
402 | if (fSame != NULL) { | |
403 | // This is a duplicate of a pool bundle string or of an earlier-visited string. | |
404 | if (++fSame->fNumCopies == 1) { | |
405 | assert(fSame->fWritten); | |
406 | int32_t poolStringIndex = (int32_t)RES_GET_OFFSET(fSame->fRes); | |
407 | if (poolStringIndex >= bundle->fPoolStringIndexLimit) { | |
408 | bundle->fPoolStringIndexLimit = poolStringIndex + 1; | |
409 | } | |
410 | } | |
411 | return; | |
412 | } | |
413 | /* Put this string into the set for finding duplicates. */ | |
414 | fNumCopies = 1; | |
415 | uhash_put(stringSet, this, this, &errorCode); | |
416 | ||
417 | if (bundle->fStringsForm != STRINGS_UTF16_V1) { | |
418 | int32_t len = length(); | |
419 | if (len <= MAX_IMPLICIT_STRING_LENGTH && | |
420 | !U16_IS_TRAIL(fString[0]) && fString.indexOf((UChar)0) < 0) { | |
421 | /* | |
422 | * This string will be stored without an explicit length. | |
423 | * Runtime will detect !U16_IS_TRAIL(s[0]) and call u_strlen(). | |
424 | */ | |
425 | fNumCharsForLength = 0; | |
426 | } else if (len <= 0x3ee) { | |
427 | fNumCharsForLength = 1; | |
428 | } else if (len <= 0xfffff) { | |
429 | fNumCharsForLength = 2; | |
430 | } else { | |
431 | fNumCharsForLength = 3; | |
432 | } | |
433 | bundle->f16BitStringsLength += fNumCharsForLength + len + 1; /* +1 for the NUL */ | |
434 | } | |
435 | } | |
436 | ||
437 | void | |
438 | ContainerResource::handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet, | |
439 | UErrorCode &errorCode) { | |
440 | for (SResource *current = fFirst; current != NULL; current = current->fNext) { | |
441 | current->preflightStrings(bundle, stringSet, errorCode); | |
442 | } | |
443 | } | |
444 | ||
445 | void | |
446 | SResource::preflightStrings(SRBRoot *bundle, UHashtable *stringSet, UErrorCode &errorCode) { | |
447 | if (U_FAILURE(errorCode)) { | |
448 | return; | |
449 | } | |
450 | if (fRes != RES_BOGUS) { | |
451 | /* | |
452 | * The resource item word was already precomputed, which means | |
453 | * no further data needs to be written. | |
454 | * This might be an integer, or an empty string/binary/etc. | |
455 | */ | |
456 | return; | |
457 | } | |
458 | handlePreflightStrings(bundle, stringSet, errorCode); | |
459 | } | |
460 | ||
461 | void | |
462 | SResource::handlePreflightStrings(SRBRoot * /*bundle*/, UHashtable * /*stringSet*/, | |
463 | UErrorCode & /*errorCode*/) { | |
464 | /* Neither a string nor a container. */ | |
465 | } | |
466 | ||
467 | int32_t | |
468 | SRBRoot::makeRes16(uint32_t resWord) const { | |
469 | if (resWord == 0) { | |
470 | return 0; /* empty string */ | |
471 | } | |
472 | uint32_t type = RES_GET_TYPE(resWord); | |
473 | int32_t offset = (int32_t)RES_GET_OFFSET(resWord); | |
474 | if (type == URES_STRING_V2) { | |
475 | assert(offset > 0); | |
476 | if (offset < fPoolStringIndexLimit) { | |
477 | if (offset < fPoolStringIndex16Limit) { | |
478 | return offset; | |
479 | } | |
480 | } else { | |
481 | offset = offset - fPoolStringIndexLimit + fPoolStringIndex16Limit; | |
482 | if (offset <= 0xffff) { | |
483 | return offset; | |
484 | } | |
485 | } | |
486 | } | |
487 | return -1; | |
488 | } | |
489 | ||
490 | int32_t | |
491 | SRBRoot::mapKey(int32_t oldpos) const { | |
492 | const KeyMapEntry *map = fKeyMap; | |
493 | if (map == NULL) { | |
494 | return oldpos; | |
495 | } | |
496 | int32_t i, start, limit; | |
497 | ||
498 | /* do a binary search for the old, pre-compactKeys() key offset */ | |
499 | start = fUsePoolBundle->fKeysCount; | |
500 | limit = start + fKeysCount; | |
501 | while (start < limit - 1) { | |
502 | i = (start + limit) / 2; | |
503 | if (oldpos < map[i].oldpos) { | |
504 | limit = i; | |
505 | } else { | |
506 | start = i; | |
507 | } | |
508 | } | |
509 | assert(oldpos == map[start].oldpos); | |
510 | return map[start].newpos; | |
511 | } | |
512 | ||
513 | /* | |
514 | * Only called for UTF-16 v1 strings and duplicate UTF-16 v2 strings. | |
515 | * For unique UTF-16 v2 strings, write16() sees fRes != RES_BOGUS | |
516 | * and exits early. | |
517 | */ | |
518 | void | |
519 | StringResource::handleWrite16(SRBRoot * /*bundle*/) { | |
520 | SResource *same; | |
521 | if ((same = fSame) != NULL) { | |
522 | /* This is a duplicate. */ | |
523 | assert(same->fRes != RES_BOGUS && same->fWritten); | |
524 | fRes = same->fRes; | |
525 | fWritten = same->fWritten; | |
526 | } | |
527 | } | |
528 | ||
529 | void | |
530 | ContainerResource::writeAllRes16(SRBRoot *bundle) { | |
531 | for (SResource *current = fFirst; current != NULL; current = current->fNext) { | |
532 | bundle->f16BitUnits.append((UChar)current->fRes16); | |
533 | } | |
534 | fWritten = TRUE; | |
535 | } | |
536 | ||
537 | void | |
538 | ArrayResource::handleWrite16(SRBRoot *bundle) { | |
539 | if (fCount == 0 && gFormatVersion > 1) { | |
540 | fRes = URES_MAKE_EMPTY_RESOURCE(URES_ARRAY); | |
541 | fWritten = TRUE; | |
542 | return; | |
543 | } | |
544 | ||
545 | int32_t res16 = 0; | |
546 | for (SResource *current = fFirst; current != NULL; current = current->fNext) { | |
547 | current->write16(bundle); | |
548 | res16 |= current->fRes16; | |
549 | } | |
550 | if (fCount <= 0xffff && res16 >= 0 && gFormatVersion > 1) { | |
551 | fRes = URES_MAKE_RESOURCE(URES_ARRAY16, bundle->f16BitUnits.length()); | |
552 | bundle->f16BitUnits.append((UChar)fCount); | |
553 | writeAllRes16(bundle); | |
554 | } | |
555 | } | |
556 | ||
557 | void | |
558 | TableResource::handleWrite16(SRBRoot *bundle) { | |
559 | if (fCount == 0 && gFormatVersion > 1) { | |
560 | fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE); | |
561 | fWritten = TRUE; | |
562 | return; | |
563 | } | |
564 | /* Find the smallest table type that fits the data. */ | |
565 | int32_t key16 = 0; | |
566 | int32_t res16 = 0; | |
567 | for (SResource *current = fFirst; current != NULL; current = current->fNext) { | |
568 | current->write16(bundle); | |
569 | key16 |= current->fKey16; | |
570 | res16 |= current->fRes16; | |
571 | } | |
572 | if(fCount > (uint32_t)bundle->fMaxTableLength) { | |
573 | bundle->fMaxTableLength = fCount; | |
574 | } | |
575 | if (fCount <= 0xffff && key16 >= 0) { | |
576 | if (res16 >= 0 && gFormatVersion > 1) { | |
577 | /* 16-bit count, key offsets and values */ | |
578 | fRes = URES_MAKE_RESOURCE(URES_TABLE16, bundle->f16BitUnits.length()); | |
579 | bundle->f16BitUnits.append((UChar)fCount); | |
580 | for (SResource *current = fFirst; current != NULL; current = current->fNext) { | |
581 | bundle->f16BitUnits.append((UChar)current->fKey16); | |
582 | } | |
583 | writeAllRes16(bundle); | |
584 | } else { | |
585 | /* 16-bit count, 16-bit key offsets, 32-bit values */ | |
586 | fTableType = URES_TABLE; | |
587 | } | |
588 | } else { | |
589 | /* 32-bit count, key offsets and values */ | |
590 | fTableType = URES_TABLE32; | |
591 | } | |
592 | } | |
593 | ||
594 | void | |
595 | PseudoListResource::handleWrite16(SRBRoot * /*bundle*/) { | |
596 | fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE); | |
597 | fWritten = TRUE; | |
598 | } | |
599 | ||
600 | void | |
601 | SResource::write16(SRBRoot *bundle) { | |
602 | if (fKey >= 0) { | |
603 | // A tagged resource has a non-negative key index into the parsed key strings. | |
604 | // compactKeys() built a map from parsed key index to the final key index. | |
605 | // After the mapping, negative key indexes are used for shared pool bundle keys. | |
606 | fKey = bundle->mapKey(fKey); | |
607 | // If the key index fits into a Key16 for a Table or Table16, | |
608 | // then set the fKey16 field accordingly. | |
609 | // Otherwise keep it at -1. | |
610 | if (fKey >= 0) { | |
611 | if (fKey < bundle->fLocalKeyLimit) { | |
612 | fKey16 = fKey; | |
613 | } | |
614 | } else { | |
615 | int32_t poolKeyIndex = fKey & 0x7fffffff; | |
616 | if (poolKeyIndex <= 0xffff) { | |
617 | poolKeyIndex += bundle->fLocalKeyLimit; | |
618 | if (poolKeyIndex <= 0xffff) { | |
619 | fKey16 = poolKeyIndex; | |
620 | } | |
621 | } | |
622 | } | |
623 | } | |
624 | /* | |
625 | * fRes != RES_BOGUS: | |
626 | * The resource item word was already precomputed, which means | |
627 | * no further data needs to be written. | |
628 | * This might be an integer, or an empty or UTF-16 v2 string, | |
629 | * an empty binary, etc. | |
630 | */ | |
631 | if (fRes == RES_BOGUS) { | |
632 | handleWrite16(bundle); | |
633 | } | |
634 | // Compute fRes16 for precomputed as well as just-computed fRes. | |
635 | fRes16 = bundle->makeRes16(fRes); | |
636 | } | |
637 | ||
638 | void | |
639 | SResource::handleWrite16(SRBRoot * /*bundle*/) { | |
640 | /* Only a few resource types write 16-bit units. */ | |
641 | } | |
642 | ||
643 | /* | |
644 | * Only called for UTF-16 v1 strings, and for aliases. | |
645 | * For UTF-16 v2 strings, preWrite() sees fRes != RES_BOGUS | |
646 | * and exits early. | |
647 | */ | |
648 | void | |
649 | StringBaseResource::handlePreWrite(uint32_t *byteOffset) { | |
650 | /* Write the UTF-16 v1 string. */ | |
651 | fRes = URES_MAKE_RESOURCE(fType, *byteOffset >> 2); | |
652 | *byteOffset += 4 + (length() + 1) * U_SIZEOF_UCHAR; | |
653 | } | |
654 | ||
655 | void | |
656 | IntVectorResource::handlePreWrite(uint32_t *byteOffset) { | |
657 | if (fCount == 0 && gFormatVersion > 1) { | |
658 | fRes = URES_MAKE_EMPTY_RESOURCE(URES_INT_VECTOR); | |
659 | fWritten = TRUE; | |
660 | } else { | |
661 | fRes = URES_MAKE_RESOURCE(URES_INT_VECTOR, *byteOffset >> 2); | |
662 | *byteOffset += (1 + fCount) * 4; | |
663 | } | |
664 | } | |
665 | ||
666 | void | |
667 | BinaryResource::handlePreWrite(uint32_t *byteOffset) { | |
668 | uint32_t pad = 0; | |
669 | uint32_t dataStart = *byteOffset + sizeof(fLength); | |
670 | ||
671 | if (dataStart % BIN_ALIGNMENT) { | |
672 | pad = (BIN_ALIGNMENT - dataStart % BIN_ALIGNMENT); | |
673 | *byteOffset += pad; /* pad == 4 or 8 or 12 */ | |
674 | } | |
675 | fRes = URES_MAKE_RESOURCE(URES_BINARY, *byteOffset >> 2); | |
676 | *byteOffset += 4 + fLength; | |
677 | } | |
678 | ||
679 | void | |
680 | ContainerResource::preWriteAllRes(uint32_t *byteOffset) { | |
681 | for (SResource *current = fFirst; current != NULL; current = current->fNext) { | |
682 | current->preWrite(byteOffset); | |
683 | } | |
684 | } | |
685 | ||
686 | void | |
687 | ArrayResource::handlePreWrite(uint32_t *byteOffset) { | |
688 | preWriteAllRes(byteOffset); | |
689 | fRes = URES_MAKE_RESOURCE(URES_ARRAY, *byteOffset >> 2); | |
690 | *byteOffset += (1 + fCount) * 4; | |
691 | } | |
692 | ||
693 | void | |
694 | TableResource::handlePreWrite(uint32_t *byteOffset) { | |
695 | preWriteAllRes(byteOffset); | |
696 | if (fTableType == URES_TABLE) { | |
697 | /* 16-bit count, 16-bit key offsets, 32-bit values */ | |
698 | fRes = URES_MAKE_RESOURCE(URES_TABLE, *byteOffset >> 2); | |
699 | *byteOffset += 2 + fCount * 6; | |
700 | } else { | |
701 | /* 32-bit count, key offsets and values */ | |
702 | fRes = URES_MAKE_RESOURCE(URES_TABLE32, *byteOffset >> 2); | |
703 | *byteOffset += 4 + fCount * 8; | |
704 | } | |
705 | } | |
706 | ||
707 | void | |
708 | SResource::preWrite(uint32_t *byteOffset) { | |
709 | if (fRes != RES_BOGUS) { | |
710 | /* | |
711 | * The resource item word was already precomputed, which means | |
712 | * no further data needs to be written. | |
713 | * This might be an integer, or an empty or UTF-16 v2 string, | |
714 | * an empty binary, etc. | |
715 | */ | |
716 | return; | |
717 | } | |
718 | handlePreWrite(byteOffset); | |
719 | *byteOffset += calcPadding(*byteOffset); | |
720 | } | |
721 | ||
722 | void | |
723 | SResource::handlePreWrite(uint32_t * /*byteOffset*/) { | |
724 | assert(FALSE); | |
725 | } | |
726 | ||
727 | /* | |
728 | * Only called for UTF-16 v1 strings, and for aliases. For UTF-16 v2 strings, | |
729 | * write() sees fWritten and exits early. | |
730 | */ | |
731 | void | |
732 | StringBaseResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) { | |
733 | /* Write the UTF-16 v1 string. */ | |
734 | int32_t len = length(); | |
735 | udata_write32(mem, len); | |
736 | udata_writeUString(mem, getBuffer(), len + 1); | |
737 | *byteOffset += 4 + (len + 1) * U_SIZEOF_UCHAR; | |
738 | fWritten = TRUE; | |
739 | } | |
740 | ||
741 | void | |
742 | ContainerResource::writeAllRes(UNewDataMemory *mem, uint32_t *byteOffset) { | |
743 | uint32_t i = 0; | |
744 | for (SResource *current = fFirst; current != NULL; ++i, current = current->fNext) { | |
745 | current->write(mem, byteOffset); | |
746 | } | |
747 | assert(i == fCount); | |
748 | } | |
749 | ||
750 | void | |
751 | ContainerResource::writeAllRes32(UNewDataMemory *mem, uint32_t *byteOffset) { | |
752 | for (SResource *current = fFirst; current != NULL; current = current->fNext) { | |
753 | udata_write32(mem, current->fRes); | |
754 | } | |
755 | *byteOffset += fCount * 4; | |
756 | } | |
757 | ||
758 | void | |
759 | ArrayResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) { | |
760 | writeAllRes(mem, byteOffset); | |
761 | udata_write32(mem, fCount); | |
762 | *byteOffset += 4; | |
763 | writeAllRes32(mem, byteOffset); | |
764 | } | |
765 | ||
766 | void | |
767 | IntVectorResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) { | |
768 | udata_write32(mem, fCount); | |
769 | for(uint32_t i = 0; i < fCount; ++i) { | |
770 | udata_write32(mem, fArray[i]); | |
771 | } | |
772 | *byteOffset += (1 + fCount) * 4; | |
773 | } | |
774 | ||
775 | void | |
776 | BinaryResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) { | |
777 | uint32_t pad = 0; | |
778 | uint32_t dataStart = *byteOffset + sizeof(fLength); | |
779 | ||
780 | if (dataStart % BIN_ALIGNMENT) { | |
781 | pad = (BIN_ALIGNMENT - dataStart % BIN_ALIGNMENT); | |
782 | udata_writePadding(mem, pad); /* pad == 4 or 8 or 12 */ | |
783 | *byteOffset += pad; | |
784 | } | |
785 | ||
786 | udata_write32(mem, fLength); | |
787 | if (fLength > 0) { | |
788 | udata_writeBlock(mem, fData, fLength); | |
789 | } | |
790 | *byteOffset += 4 + fLength; | |
791 | } | |
792 | ||
793 | void | |
794 | TableResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) { | |
795 | writeAllRes(mem, byteOffset); | |
796 | if(fTableType == URES_TABLE) { | |
797 | udata_write16(mem, (uint16_t)fCount); | |
798 | for (SResource *current = fFirst; current != NULL; current = current->fNext) { | |
799 | udata_write16(mem, current->fKey16); | |
800 | } | |
801 | *byteOffset += (1 + fCount)* 2; | |
802 | if ((fCount & 1) == 0) { | |
803 | /* 16-bit count and even number of 16-bit key offsets need padding before 32-bit resource items */ | |
804 | udata_writePadding(mem, 2); | |
805 | *byteOffset += 2; | |
806 | } | |
807 | } else /* URES_TABLE32 */ { | |
808 | udata_write32(mem, fCount); | |
809 | for (SResource *current = fFirst; current != NULL; current = current->fNext) { | |
810 | udata_write32(mem, (uint32_t)current->fKey); | |
811 | } | |
812 | *byteOffset += (1 + fCount)* 4; | |
813 | } | |
814 | writeAllRes32(mem, byteOffset); | |
815 | } | |
816 | ||
817 | void | |
818 | SResource::write(UNewDataMemory *mem, uint32_t *byteOffset) { | |
819 | if (fWritten) { | |
820 | assert(fRes != RES_BOGUS); | |
821 | return; | |
822 | } | |
823 | handleWrite(mem, byteOffset); | |
824 | uint8_t paddingSize = calcPadding(*byteOffset); | |
825 | if (paddingSize > 0) { | |
826 | udata_writePadding(mem, paddingSize); | |
827 | *byteOffset += paddingSize; | |
828 | } | |
829 | fWritten = TRUE; | |
830 | } | |
831 | ||
832 | void | |
833 | SResource::handleWrite(UNewDataMemory * /*mem*/, uint32_t * /*byteOffset*/) { | |
834 | assert(FALSE); | |
835 | } | |
836 | ||
837 | void SRBRoot::write(const char *outputDir, const char *outputPkg, | |
838 | char *writtenFilename, int writtenFilenameLen, | |
839 | UErrorCode &errorCode) { | |
840 | UNewDataMemory *mem = NULL; | |
841 | uint32_t byteOffset = 0; | |
842 | uint32_t top, size; | |
843 | char dataName[1024]; | |
844 | int32_t indexes[URES_INDEX_TOP]; | |
845 | ||
846 | compactKeys(errorCode); | |
847 | /* | |
848 | * Add padding bytes to fKeys so that fKeysTop is 4-aligned. | |
849 | * Safe because the capacity is a multiple of 4. | |
850 | */ | |
851 | while (fKeysTop & 3) { | |
852 | fKeys[fKeysTop++] = (char)0xaa; | |
853 | } | |
854 | /* | |
855 | * In URES_TABLE, use all local key offsets that fit into 16 bits, | |
856 | * and use the remaining 16-bit offsets for pool key offsets | |
857 | * if there are any. | |
858 | * If there are no local keys, then use the whole 16-bit space | |
859 | * for pool key offsets. | |
860 | * Note: This cannot be changed without changing the major formatVersion. | |
861 | */ | |
862 | if (fKeysBottom < fKeysTop) { | |
863 | if (fKeysTop <= 0x10000) { | |
864 | fLocalKeyLimit = fKeysTop; | |
865 | } else { | |
866 | fLocalKeyLimit = 0x10000; | |
867 | } | |
868 | } else { | |
869 | fLocalKeyLimit = 0; | |
870 | } | |
871 | ||
872 | UHashtable *stringSet; | |
873 | if (gFormatVersion > 1) { | |
874 | stringSet = uhash_open(string_hash, string_comp, string_comp, &errorCode); | |
875 | if (U_SUCCESS(errorCode) && | |
876 | fUsePoolBundle != NULL && fUsePoolBundle->fStrings != NULL) { | |
877 | for (SResource *current = fUsePoolBundle->fStrings->fFirst; | |
878 | current != NULL; | |
879 | current = current->fNext) { | |
880 | StringResource *sr = static_cast<StringResource *>(current); | |
881 | sr->fNumCopies = 0; | |
882 | sr->fNumUnitsSaved = 0; | |
883 | uhash_put(stringSet, sr, sr, &errorCode); | |
884 | } | |
885 | } | |
886 | fRoot->preflightStrings(this, stringSet, errorCode); | |
887 | } else { | |
888 | stringSet = NULL; | |
889 | } | |
890 | if (fStringsForm == STRINGS_UTF16_V2 && f16BitStringsLength > 0) { | |
891 | compactStringsV2(stringSet, errorCode); | |
892 | } | |
893 | uhash_close(stringSet); | |
894 | if (U_FAILURE(errorCode)) { | |
895 | return; | |
896 | } | |
897 | ||
898 | int32_t formatVersion = gFormatVersion; | |
899 | if (fPoolStringIndexLimit != 0) { | |
900 | int32_t sum = fPoolStringIndexLimit + fLocalStringIndexLimit; | |
901 | if ((sum - 1) > RES_MAX_OFFSET) { | |
902 | errorCode = U_BUFFER_OVERFLOW_ERROR; | |
903 | return; | |
904 | } | |
905 | if (fPoolStringIndexLimit < 0x10000 && sum <= 0x10000) { | |
906 | // 16-bit indexes work for all pool + local strings. | |
907 | fPoolStringIndex16Limit = fPoolStringIndexLimit; | |
908 | } else { | |
909 | // Set the pool index threshold so that 16-bit indexes work | |
910 | // for some pool strings and some local strings. | |
911 | fPoolStringIndex16Limit = (int32_t)( | |
912 | ((int64_t)fPoolStringIndexLimit * 0xffff) / sum); | |
913 | } | |
914 | } else if (gIsDefaultFormatVersion && formatVersion == 3 && !fIsPoolBundle) { | |
915 | // If we just default to formatVersion 3 | |
916 | // but there are no pool bundle strings to share | |
917 | // and we do not write a pool bundle, | |
918 | // then write formatVersion 2 which is just as good. | |
919 | formatVersion = 2; | |
920 | } | |
921 | ||
922 | fRoot->write16(this); | |
923 | if (f16BitUnits.isBogus()) { | |
924 | errorCode = U_MEMORY_ALLOCATION_ERROR; | |
925 | return; | |
926 | } | |
927 | if (f16BitUnits.length() & 1) { | |
928 | f16BitUnits.append((UChar)0xaaaa); /* pad to multiple of 4 bytes */ | |
929 | } | |
2ca993e8 A |
930 | |
931 | byteOffset = fKeysTop + f16BitUnits.length() * 2; | |
932 | fRoot->preWrite(&byteOffset); | |
933 | ||
934 | /* total size including the root item */ | |
935 | top = byteOffset; | |
936 | ||
937 | if (writtenFilename && writtenFilenameLen) { | |
938 | *writtenFilename = 0; | |
939 | } | |
940 | ||
941 | if (writtenFilename) { | |
942 | int32_t off = 0, len = 0; | |
943 | if (outputDir) { | |
944 | len = (int32_t)uprv_strlen(outputDir); | |
945 | if (len > writtenFilenameLen) { | |
946 | len = writtenFilenameLen; | |
947 | } | |
948 | uprv_strncpy(writtenFilename, outputDir, len); | |
949 | } | |
950 | if (writtenFilenameLen -= len) { | |
951 | off += len; | |
952 | writtenFilename[off] = U_FILE_SEP_CHAR; | |
953 | if (--writtenFilenameLen) { | |
954 | ++off; | |
955 | if(outputPkg != NULL) | |
956 | { | |
957 | uprv_strcpy(writtenFilename+off, outputPkg); | |
958 | off += (int32_t)uprv_strlen(outputPkg); | |
959 | writtenFilename[off] = '_'; | |
960 | ++off; | |
961 | } | |
962 | ||
963 | len = (int32_t)uprv_strlen(fLocale); | |
964 | if (len > writtenFilenameLen) { | |
965 | len = writtenFilenameLen; | |
966 | } | |
967 | uprv_strncpy(writtenFilename + off, fLocale, len); | |
968 | if (writtenFilenameLen -= len) { | |
969 | off += len; | |
970 | len = 5; | |
971 | if (len > writtenFilenameLen) { | |
972 | len = writtenFilenameLen; | |
973 | } | |
974 | uprv_strncpy(writtenFilename + off, ".res", len); | |
975 | } | |
976 | } | |
977 | } | |
978 | } | |
979 | ||
980 | if(outputPkg) | |
981 | { | |
982 | uprv_strcpy(dataName, outputPkg); | |
983 | uprv_strcat(dataName, "_"); | |
984 | uprv_strcat(dataName, fLocale); | |
985 | } | |
986 | else | |
987 | { | |
988 | uprv_strcpy(dataName, fLocale); | |
989 | } | |
990 | ||
991 | uprv_memcpy(dataInfo.formatVersion, gFormatVersions + formatVersion, sizeof(UVersionInfo)); | |
992 | ||
993 | mem = udata_create(outputDir, "res", dataName, | |
994 | &dataInfo, (gIncludeCopyright==TRUE)? U_COPYRIGHT_STRING:NULL, &errorCode); | |
995 | if(U_FAILURE(errorCode)){ | |
996 | return; | |
997 | } | |
998 | ||
999 | /* write the root item */ | |
1000 | udata_write32(mem, fRoot->fRes); | |
1001 | ||
1002 | /* | |
1003 | * formatVersion 1.1 (ICU 2.8): | |
1004 | * write int32_t indexes[] after root and before the key strings | |
1005 | * to make it easier to parse resource bundles in icuswap or from Java etc. | |
1006 | */ | |
1007 | uprv_memset(indexes, 0, sizeof(indexes)); | |
1008 | indexes[URES_INDEX_LENGTH]= fIndexLength; | |
1009 | indexes[URES_INDEX_KEYS_TOP]= fKeysTop>>2; | |
1010 | indexes[URES_INDEX_RESOURCES_TOP]= (int32_t)(top>>2); | |
1011 | indexes[URES_INDEX_BUNDLE_TOP]= indexes[URES_INDEX_RESOURCES_TOP]; | |
1012 | indexes[URES_INDEX_MAX_TABLE_LENGTH]= fMaxTableLength; | |
1013 | ||
1014 | /* | |
1015 | * formatVersion 1.2 (ICU 3.6): | |
1016 | * write indexes[URES_INDEX_ATTRIBUTES] with URES_ATT_NO_FALLBACK set or not set | |
1017 | * the memset() above initialized all indexes[] to 0 | |
1018 | */ | |
1019 | if (fNoFallback) { | |
1020 | indexes[URES_INDEX_ATTRIBUTES]=URES_ATT_NO_FALLBACK; | |
1021 | } | |
1022 | /* | |
1023 | * formatVersion 2.0 (ICU 4.4): | |
1024 | * more compact string value storage, optional pool bundle | |
1025 | */ | |
1026 | if (URES_INDEX_16BIT_TOP < fIndexLength) { | |
1027 | indexes[URES_INDEX_16BIT_TOP] = (fKeysTop>>2) + (f16BitUnits.length()>>1); | |
1028 | } | |
1029 | if (URES_INDEX_POOL_CHECKSUM < fIndexLength) { | |
1030 | if (fIsPoolBundle) { | |
1031 | indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_IS_POOL_BUNDLE | URES_ATT_NO_FALLBACK; | |
1032 | uint32_t checksum = computeCRC((const char *)(fKeys + fKeysBottom), | |
1033 | (uint32_t)(fKeysTop - fKeysBottom), 0); | |
1034 | if (f16BitUnits.length() <= 1) { | |
1035 | // no pool strings to checksum | |
1036 | } else if (U_IS_BIG_ENDIAN) { | |
f3c0d7a5 | 1037 | checksum = computeCRC(reinterpret_cast<const char *>(f16BitUnits.getBuffer()), |
2ca993e8 A |
1038 | (uint32_t)f16BitUnits.length() * 2, checksum); |
1039 | } else { | |
1040 | // Swap to big-endian so we get the same checksum on all platforms | |
1041 | // (except for charset family, due to the key strings). | |
1042 | UnicodeString s(f16BitUnits); | |
2ca993e8 | 1043 | assert(!s.isBogus()); |
3d1f044b A |
1044 | // .getBuffer(capacity) returns a mutable buffer |
1045 | char16_t* p = s.getBuffer(f16BitUnits.length()); | |
2ca993e8 A |
1046 | for (int32_t count = f16BitUnits.length(); count > 0; --count) { |
1047 | uint16_t x = *p; | |
1048 | *p++ = (uint16_t)((x << 8) | (x >> 8)); | |
1049 | } | |
3d1f044b A |
1050 | s.releaseBuffer(f16BitUnits.length()); |
1051 | checksum = computeCRC((const char *)s.getBuffer(), | |
2ca993e8 A |
1052 | (uint32_t)f16BitUnits.length() * 2, checksum); |
1053 | } | |
1054 | indexes[URES_INDEX_POOL_CHECKSUM] = (int32_t)checksum; | |
1055 | } else if (gUsePoolBundle) { | |
1056 | indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_USES_POOL_BUNDLE; | |
1057 | indexes[URES_INDEX_POOL_CHECKSUM] = fUsePoolBundle->fChecksum; | |
1058 | } | |
1059 | } | |
1060 | // formatVersion 3 (ICU 56): | |
1061 | // share string values via pool bundle strings | |
1062 | indexes[URES_INDEX_LENGTH] |= fPoolStringIndexLimit << 8; // bits 23..0 -> 31..8 | |
1063 | indexes[URES_INDEX_ATTRIBUTES] |= (fPoolStringIndexLimit >> 12) & 0xf000; // bits 27..24 -> 15..12 | |
1064 | indexes[URES_INDEX_ATTRIBUTES] |= fPoolStringIndex16Limit << 16; | |
1065 | ||
1066 | /* write the indexes[] */ | |
1067 | udata_writeBlock(mem, indexes, fIndexLength*4); | |
1068 | ||
1069 | /* write the table key strings */ | |
1070 | udata_writeBlock(mem, fKeys+fKeysBottom, | |
1071 | fKeysTop-fKeysBottom); | |
1072 | ||
1073 | /* write the v2 UTF-16 strings, URES_TABLE16 and URES_ARRAY16 */ | |
1074 | udata_writeBlock(mem, f16BitUnits.getBuffer(), f16BitUnits.length()*2); | |
1075 | ||
1076 | /* write all of the bundle contents: the root item and its children */ | |
1077 | byteOffset = fKeysTop + f16BitUnits.length() * 2; | |
1078 | fRoot->write(mem, &byteOffset); | |
1079 | assert(byteOffset == top); | |
1080 | ||
1081 | size = udata_finish(mem, &errorCode); | |
1082 | if(top != size) { | |
1083 | fprintf(stderr, "genrb error: wrote %u bytes but counted %u\n", | |
1084 | (int)size, (int)top); | |
1085 | errorCode = U_INTERNAL_PROGRAM_ERROR; | |
1086 | } | |
1087 | } | |
1088 | ||
1089 | /* Opening Functions */ | |
1090 | ||
1091 | TableResource* table_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) { | |
1092 | LocalPointer<TableResource> res(new TableResource(bundle, tag, comment, *status), *status); | |
1093 | return U_SUCCESS(*status) ? res.orphan() : NULL; | |
1094 | } | |
1095 | ||
1096 | ArrayResource* array_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) { | |
1097 | LocalPointer<ArrayResource> res(new ArrayResource(bundle, tag, comment, *status), *status); | |
1098 | return U_SUCCESS(*status) ? res.orphan() : NULL; | |
1099 | } | |
1100 | ||
1101 | struct SResource *string_open(struct SRBRoot *bundle, const char *tag, const UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) { | |
1102 | LocalPointer<SResource> res( | |
1103 | new StringResource(bundle, tag, value, len, comment, *status), *status); | |
1104 | return U_SUCCESS(*status) ? res.orphan() : NULL; | |
1105 | } | |
1106 | ||
1107 | struct SResource *alias_open(struct SRBRoot *bundle, const char *tag, UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) { | |
1108 | LocalPointer<SResource> res( | |
1109 | new AliasResource(bundle, tag, value, len, comment, *status), *status); | |
1110 | return U_SUCCESS(*status) ? res.orphan() : NULL; | |
1111 | } | |
1112 | ||
1113 | IntVectorResource *intvector_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) { | |
1114 | LocalPointer<IntVectorResource> res( | |
1115 | new IntVectorResource(bundle, tag, comment, *status), *status); | |
1116 | return U_SUCCESS(*status) ? res.orphan() : NULL; | |
1117 | } | |
1118 | ||
1119 | struct SResource *int_open(struct SRBRoot *bundle, const char *tag, int32_t value, const struct UString* comment, UErrorCode *status) { | |
1120 | LocalPointer<SResource> res(new IntResource(bundle, tag, value, comment, *status), *status); | |
1121 | return U_SUCCESS(*status) ? res.orphan() : NULL; | |
1122 | } | |
1123 | ||
1124 | struct SResource *bin_open(struct SRBRoot *bundle, const char *tag, uint32_t length, uint8_t *data, const char* fileName, const struct UString* comment, UErrorCode *status) { | |
1125 | LocalPointer<SResource> res( | |
1126 | new BinaryResource(bundle, tag, length, data, fileName, comment, *status), *status); | |
1127 | return U_SUCCESS(*status) ? res.orphan() : NULL; | |
1128 | } | |
1129 | ||
1130 | SRBRoot::SRBRoot(const UString *comment, UBool isPoolBundle, UErrorCode &errorCode) | |
1131 | : fRoot(NULL), fLocale(NULL), fIndexLength(0), fMaxTableLength(0), fNoFallback(FALSE), | |
1132 | fStringsForm(STRINGS_UTF16_V1), fIsPoolBundle(isPoolBundle), | |
1133 | fKeys(NULL), fKeyMap(NULL), | |
3d1f044b A |
1134 | fKeysBottom(0), fKeysTop(0), fKeysCapacity(0), |
1135 | fKeysCount(0), fLocalKeyLimit(0), | |
2ca993e8 A |
1136 | f16BitUnits(), f16BitStringsLength(0), |
1137 | fUsePoolBundle(&kNoPoolBundle), | |
1138 | fPoolStringIndexLimit(0), fPoolStringIndex16Limit(0), fLocalStringIndexLimit(0), | |
1139 | fWritePoolBundle(NULL) { | |
1140 | if (U_FAILURE(errorCode)) { | |
1141 | return; | |
1142 | } | |
1143 | ||
1144 | if (gFormatVersion > 1) { | |
1145 | // f16BitUnits must start with a zero for empty resources. | |
1146 | // We might be able to omit it if there are no empty 16-bit resources. | |
1147 | f16BitUnits.append((UChar)0); | |
1148 | } | |
1149 | ||
1150 | fKeys = (char *) uprv_malloc(sizeof(char) * KEY_SPACE_SIZE); | |
1151 | if (isPoolBundle) { | |
1152 | fRoot = new PseudoListResource(this, errorCode); | |
1153 | } else { | |
1154 | fRoot = new TableResource(this, NULL, comment, errorCode); | |
1155 | } | |
1156 | if (fKeys == NULL || fRoot == NULL || U_FAILURE(errorCode)) { | |
1157 | if (U_SUCCESS(errorCode)) { | |
1158 | errorCode = U_MEMORY_ALLOCATION_ERROR; | |
1159 | } | |
1160 | return; | |
1161 | } | |
1162 | ||
1163 | fKeysCapacity = KEY_SPACE_SIZE; | |
1164 | /* formatVersion 1.1 and up: start fKeysTop after the root item and indexes[] */ | |
1165 | if (gUsePoolBundle || isPoolBundle) { | |
1166 | fIndexLength = URES_INDEX_POOL_CHECKSUM + 1; | |
1167 | } else if (gFormatVersion >= 2) { | |
1168 | fIndexLength = URES_INDEX_16BIT_TOP + 1; | |
1169 | } else /* formatVersion 1 */ { | |
1170 | fIndexLength = URES_INDEX_ATTRIBUTES + 1; | |
1171 | } | |
1172 | fKeysBottom = (1 /* root */ + fIndexLength) * 4; | |
1173 | uprv_memset(fKeys, 0, fKeysBottom); | |
1174 | fKeysTop = fKeysBottom; | |
1175 | ||
1176 | if (gFormatVersion == 1) { | |
1177 | fStringsForm = STRINGS_UTF16_V1; | |
1178 | } else { | |
1179 | fStringsForm = STRINGS_UTF16_V2; | |
1180 | } | |
1181 | } | |
1182 | ||
1183 | /* Closing Functions */ | |
1184 | ||
1185 | void res_close(struct SResource *res) { | |
1186 | delete res; | |
1187 | } | |
1188 | ||
1189 | SRBRoot::~SRBRoot() { | |
1190 | delete fRoot; | |
1191 | uprv_free(fLocale); | |
1192 | uprv_free(fKeys); | |
1193 | uprv_free(fKeyMap); | |
1194 | } | |
1195 | ||
1196 | /* Misc Functions */ | |
1197 | ||
1198 | void SRBRoot::setLocale(UChar *locale, UErrorCode &errorCode) { | |
1199 | if(U_FAILURE(errorCode)) { | |
1200 | return; | |
1201 | } | |
1202 | ||
1203 | uprv_free(fLocale); | |
1204 | fLocale = (char*) uprv_malloc(sizeof(char) * (u_strlen(locale)+1)); | |
1205 | if(fLocale == NULL) { | |
1206 | errorCode = U_MEMORY_ALLOCATION_ERROR; | |
1207 | return; | |
1208 | } | |
1209 | ||
1210 | u_UCharsToChars(locale, fLocale, u_strlen(locale)+1); | |
1211 | } | |
1212 | ||
1213 | const char * | |
1214 | SRBRoot::getKeyString(int32_t key) const { | |
1215 | if (key < 0) { | |
1216 | return fUsePoolBundle->fKeys + (key & 0x7fffffff); | |
1217 | } else { | |
1218 | return fKeys + key; | |
1219 | } | |
1220 | } | |
1221 | ||
1222 | const char * | |
1223 | SResource::getKeyString(const SRBRoot *bundle) const { | |
1224 | if (fKey == -1) { | |
1225 | return NULL; | |
1226 | } | |
1227 | return bundle->getKeyString(fKey); | |
1228 | } | |
1229 | ||
1230 | const char * | |
1231 | SRBRoot::getKeyBytes(int32_t *pLength) const { | |
1232 | *pLength = fKeysTop - fKeysBottom; | |
1233 | return fKeys + fKeysBottom; | |
1234 | } | |
1235 | ||
1236 | int32_t | |
1237 | SRBRoot::addKeyBytes(const char *keyBytes, int32_t length, UErrorCode &errorCode) { | |
1238 | int32_t keypos; | |
1239 | ||
3d1f044b A |
1240 | // It is not legal to add new key bytes after compactKeys is run! |
1241 | U_ASSERT(fKeyMap == nullptr); | |
1242 | ||
2ca993e8 A |
1243 | if (U_FAILURE(errorCode)) { |
1244 | return -1; | |
1245 | } | |
1246 | if (length < 0 || (keyBytes == NULL && length != 0)) { | |
1247 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; | |
1248 | return -1; | |
1249 | } | |
1250 | if (length == 0) { | |
1251 | return fKeysTop; | |
1252 | } | |
1253 | ||
1254 | keypos = fKeysTop; | |
1255 | fKeysTop += length; | |
1256 | if (fKeysTop >= fKeysCapacity) { | |
1257 | /* overflow - resize the keys buffer */ | |
1258 | fKeysCapacity += KEY_SPACE_SIZE; | |
1259 | fKeys = static_cast<char *>(uprv_realloc(fKeys, fKeysCapacity)); | |
1260 | if(fKeys == NULL) { | |
1261 | errorCode = U_MEMORY_ALLOCATION_ERROR; | |
1262 | return -1; | |
1263 | } | |
1264 | } | |
1265 | ||
1266 | uprv_memcpy(fKeys + keypos, keyBytes, length); | |
1267 | ||
1268 | return keypos; | |
1269 | } | |
1270 | ||
1271 | int32_t | |
1272 | SRBRoot::addTag(const char *tag, UErrorCode &errorCode) { | |
1273 | int32_t keypos; | |
1274 | ||
1275 | if (U_FAILURE(errorCode)) { | |
1276 | return -1; | |
1277 | } | |
1278 | ||
1279 | if (tag == NULL) { | |
1280 | /* no error: the root table and array items have no keys */ | |
1281 | return -1; | |
1282 | } | |
1283 | ||
1284 | keypos = addKeyBytes(tag, (int32_t)(uprv_strlen(tag) + 1), errorCode); | |
1285 | if (U_SUCCESS(errorCode)) { | |
1286 | ++fKeysCount; | |
1287 | } | |
1288 | return keypos; | |
1289 | } | |
1290 | ||
1291 | static int32_t | |
1292 | compareInt32(int32_t lPos, int32_t rPos) { | |
1293 | /* | |
1294 | * Compare possibly-negative key offsets. Don't just return lPos - rPos | |
1295 | * because that is prone to negative-integer underflows. | |
1296 | */ | |
1297 | if (lPos < rPos) { | |
1298 | return -1; | |
1299 | } else if (lPos > rPos) { | |
1300 | return 1; | |
1301 | } else { | |
1302 | return 0; | |
1303 | } | |
1304 | } | |
1305 | ||
1306 | static int32_t U_CALLCONV | |
1307 | compareKeySuffixes(const void *context, const void *l, const void *r) { | |
1308 | const struct SRBRoot *bundle=(const struct SRBRoot *)context; | |
1309 | int32_t lPos = ((const KeyMapEntry *)l)->oldpos; | |
1310 | int32_t rPos = ((const KeyMapEntry *)r)->oldpos; | |
1311 | const char *lStart = bundle->getKeyString(lPos); | |
1312 | const char *lLimit = lStart; | |
1313 | const char *rStart = bundle->getKeyString(rPos); | |
1314 | const char *rLimit = rStart; | |
1315 | int32_t diff; | |
1316 | while (*lLimit != 0) { ++lLimit; } | |
1317 | while (*rLimit != 0) { ++rLimit; } | |
1318 | /* compare keys in reverse character order */ | |
1319 | while (lStart < lLimit && rStart < rLimit) { | |
1320 | diff = (int32_t)(uint8_t)*--lLimit - (int32_t)(uint8_t)*--rLimit; | |
1321 | if (diff != 0) { | |
1322 | return diff; | |
1323 | } | |
1324 | } | |
1325 | /* sort equal suffixes by descending key length */ | |
1326 | diff = (int32_t)(rLimit - rStart) - (int32_t)(lLimit - lStart); | |
1327 | if (diff != 0) { | |
1328 | return diff; | |
1329 | } | |
1330 | /* Sort pool bundle keys first (negative oldpos), and otherwise keys in parsing order. */ | |
1331 | return compareInt32(lPos, rPos); | |
1332 | } | |
1333 | ||
1334 | static int32_t U_CALLCONV | |
1335 | compareKeyNewpos(const void * /*context*/, const void *l, const void *r) { | |
1336 | return compareInt32(((const KeyMapEntry *)l)->newpos, ((const KeyMapEntry *)r)->newpos); | |
1337 | } | |
1338 | ||
1339 | static int32_t U_CALLCONV | |
1340 | compareKeyOldpos(const void * /*context*/, const void *l, const void *r) { | |
1341 | return compareInt32(((const KeyMapEntry *)l)->oldpos, ((const KeyMapEntry *)r)->oldpos); | |
1342 | } | |
1343 | ||
3d1f044b A |
1344 | void SResource::collectKeys(std::function<void(int32_t)> collector) const { |
1345 | collector(fKey); | |
1346 | } | |
1347 | ||
1348 | void ContainerResource::collectKeys(std::function<void(int32_t)> collector) const { | |
1349 | collector(fKey); | |
1350 | for (SResource* curr = fFirst; curr != NULL; curr = curr->fNext) { | |
1351 | curr->collectKeys(collector); | |
1352 | } | |
1353 | } | |
1354 | ||
2ca993e8 A |
1355 | void |
1356 | SRBRoot::compactKeys(UErrorCode &errorCode) { | |
1357 | KeyMapEntry *map; | |
1358 | char *keys; | |
1359 | int32_t i; | |
3d1f044b A |
1360 | |
1361 | // Except for pool bundles, keys might not be used. | |
1362 | // Do not add unused keys to the final bundle. | |
1363 | std::set<int32_t> keysInUse; | |
1364 | if (!fIsPoolBundle) { | |
1365 | fRoot->collectKeys([&keysInUse](int32_t key) { | |
1366 | if (key >= 0) { | |
1367 | keysInUse.insert(key); | |
1368 | } | |
1369 | }); | |
1370 | fKeysCount = static_cast<int32_t>(keysInUse.size()); | |
1371 | } | |
1372 | ||
2ca993e8 A |
1373 | int32_t keysCount = fUsePoolBundle->fKeysCount + fKeysCount; |
1374 | if (U_FAILURE(errorCode) || fKeysCount == 0 || fKeyMap != NULL) { | |
1375 | return; | |
1376 | } | |
1377 | map = (KeyMapEntry *)uprv_malloc(keysCount * sizeof(KeyMapEntry)); | |
1378 | if (map == NULL) { | |
1379 | errorCode = U_MEMORY_ALLOCATION_ERROR; | |
1380 | return; | |
1381 | } | |
1382 | keys = (char *)fUsePoolBundle->fKeys; | |
1383 | for (i = 0; i < fUsePoolBundle->fKeysCount; ++i) { | |
1384 | map[i].oldpos = | |
1385 | (int32_t)(keys - fUsePoolBundle->fKeys) | 0x80000000; /* negative oldpos */ | |
1386 | map[i].newpos = 0; | |
1387 | while (*keys != 0) { ++keys; } /* skip the key */ | |
1388 | ++keys; /* skip the NUL */ | |
1389 | } | |
1390 | keys = fKeys + fKeysBottom; | |
3d1f044b A |
1391 | while (i < keysCount) { |
1392 | int32_t keyOffset = static_cast<int32_t>(keys - fKeys); | |
1393 | if (!fIsPoolBundle && keysInUse.count(keyOffset) == 0) { | |
1394 | // Mark the unused key as deleted | |
1395 | while (*keys != 0) { *keys++ = 1; } | |
1396 | *keys++ = 1; | |
1397 | } else { | |
1398 | map[i].oldpos = keyOffset; | |
1399 | map[i].newpos = 0; | |
1400 | while (*keys != 0) { ++keys; } /* skip the key */ | |
1401 | ++keys; /* skip the NUL */ | |
1402 | i++; | |
1403 | } | |
1404 | } | |
1405 | if (keys != fKeys + fKeysTop) { | |
1406 | // Throw away any unused keys from the end | |
1407 | fKeysTop = static_cast<int32_t>(keys - fKeys); | |
2ca993e8 A |
1408 | } |
1409 | /* Sort the keys so that each one is immediately followed by all of its suffixes. */ | |
1410 | uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry), | |
1411 | compareKeySuffixes, this, FALSE, &errorCode); | |
1412 | /* | |
1413 | * Make suffixes point into earlier, longer strings that contain them | |
1414 | * and mark the old, now unused suffix bytes as deleted. | |
1415 | */ | |
1416 | if (U_SUCCESS(errorCode)) { | |
1417 | keys = fKeys; | |
1418 | for (i = 0; i < keysCount;) { | |
1419 | /* | |
1420 | * This key is not a suffix of the previous one; | |
1421 | * keep this one and delete the following ones that are | |
1422 | * suffixes of this one. | |
1423 | */ | |
1424 | const char *key; | |
1425 | const char *keyLimit; | |
1426 | int32_t j = i + 1; | |
1427 | map[i].newpos = map[i].oldpos; | |
1428 | if (j < keysCount && map[j].oldpos < 0) { | |
1429 | /* Key string from the pool bundle, do not delete. */ | |
1430 | i = j; | |
1431 | continue; | |
1432 | } | |
1433 | key = getKeyString(map[i].oldpos); | |
1434 | for (keyLimit = key; *keyLimit != 0; ++keyLimit) {} | |
1435 | for (; j < keysCount && map[j].oldpos >= 0; ++j) { | |
1436 | const char *k; | |
1437 | char *suffix; | |
1438 | const char *suffixLimit; | |
1439 | int32_t offset; | |
1440 | suffix = keys + map[j].oldpos; | |
1441 | for (suffixLimit = suffix; *suffixLimit != 0; ++suffixLimit) {} | |
3d1f044b | 1442 | offset = static_cast<int32_t>((keyLimit - key) - (suffixLimit - suffix)); |
2ca993e8 A |
1443 | if (offset < 0) { |
1444 | break; /* suffix cannot be longer than the original */ | |
1445 | } | |
1446 | /* Is it a suffix of the earlier, longer key? */ | |
1447 | for (k = keyLimit; suffix < suffixLimit && *--k == *--suffixLimit;) {} | |
1448 | if (suffix == suffixLimit && *k == *suffixLimit) { | |
1449 | map[j].newpos = map[i].oldpos + offset; /* yes, point to the earlier key */ | |
3d1f044b | 1450 | // Mark the suffix as deleted |
2ca993e8 A |
1451 | while (*suffix != 0) { *suffix++ = 1; } |
1452 | *suffix = 1; | |
1453 | } else { | |
1454 | break; /* not a suffix, restart from here */ | |
1455 | } | |
1456 | } | |
1457 | i = j; | |
1458 | } | |
1459 | /* | |
1460 | * Re-sort by newpos, then modify the key characters array in-place | |
1461 | * to squeeze out unused bytes, and readjust the newpos offsets. | |
1462 | */ | |
1463 | uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry), | |
1464 | compareKeyNewpos, NULL, FALSE, &errorCode); | |
1465 | if (U_SUCCESS(errorCode)) { | |
1466 | int32_t oldpos, newpos, limit; | |
1467 | oldpos = newpos = fKeysBottom; | |
1468 | limit = fKeysTop; | |
1469 | /* skip key offsets that point into the pool bundle rather than this new bundle */ | |
1470 | for (i = 0; i < keysCount && map[i].newpos < 0; ++i) {} | |
1471 | if (i < keysCount) { | |
1472 | while (oldpos < limit) { | |
1473 | if (keys[oldpos] == 1) { | |
1474 | ++oldpos; /* skip unused bytes */ | |
1475 | } else { | |
1476 | /* adjust the new offsets for keys starting here */ | |
1477 | while (i < keysCount && map[i].newpos == oldpos) { | |
1478 | map[i++].newpos = newpos; | |
1479 | } | |
1480 | /* move the key characters to their new position */ | |
1481 | keys[newpos++] = keys[oldpos++]; | |
1482 | } | |
1483 | } | |
3d1f044b | 1484 | U_ASSERT(i == keysCount); |
2ca993e8 A |
1485 | } |
1486 | fKeysTop = newpos; | |
1487 | /* Re-sort once more, by old offsets for binary searching. */ | |
1488 | uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry), | |
1489 | compareKeyOldpos, NULL, FALSE, &errorCode); | |
1490 | if (U_SUCCESS(errorCode)) { | |
1491 | /* key size reduction by limit - newpos */ | |
1492 | fKeyMap = map; | |
1493 | map = NULL; | |
1494 | } | |
1495 | } | |
1496 | } | |
1497 | uprv_free(map); | |
1498 | } | |
1499 | ||
1500 | static int32_t U_CALLCONV | |
1501 | compareStringSuffixes(const void * /*context*/, const void *l, const void *r) { | |
1502 | const StringResource *left = *((const StringResource **)l); | |
1503 | const StringResource *right = *((const StringResource **)r); | |
1504 | const UChar *lStart = left->getBuffer(); | |
1505 | const UChar *lLimit = lStart + left->length(); | |
1506 | const UChar *rStart = right->getBuffer(); | |
1507 | const UChar *rLimit = rStart + right->length(); | |
1508 | int32_t diff; | |
1509 | /* compare keys in reverse character order */ | |
1510 | while (lStart < lLimit && rStart < rLimit) { | |
1511 | diff = (int32_t)*--lLimit - (int32_t)*--rLimit; | |
1512 | if (diff != 0) { | |
1513 | return diff; | |
1514 | } | |
1515 | } | |
1516 | /* sort equal suffixes by descending string length */ | |
1517 | return right->length() - left->length(); | |
1518 | } | |
1519 | ||
1520 | static int32_t U_CALLCONV | |
1521 | compareStringLengths(const void * /*context*/, const void *l, const void *r) { | |
1522 | const StringResource *left = *((const StringResource **)l); | |
1523 | const StringResource *right = *((const StringResource **)r); | |
1524 | int32_t diff; | |
1525 | /* Make "is suffix of another string" compare greater than a non-suffix. */ | |
1526 | diff = (int)(left->fSame != NULL) - (int)(right->fSame != NULL); | |
1527 | if (diff != 0) { | |
1528 | return diff; | |
1529 | } | |
1530 | /* sort by ascending string length */ | |
1531 | diff = left->length() - right->length(); | |
1532 | if (diff != 0) { | |
1533 | return diff; | |
1534 | } | |
1535 | // sort by descending size reduction | |
1536 | diff = right->fNumUnitsSaved - left->fNumUnitsSaved; | |
1537 | if (diff != 0) { | |
1538 | return diff; | |
1539 | } | |
1540 | // sort lexically | |
1541 | return left->fString.compare(right->fString); | |
1542 | } | |
1543 | ||
1544 | void | |
1545 | StringResource::writeUTF16v2(int32_t base, UnicodeString &dest) { | |
1546 | int32_t len = length(); | |
1547 | fRes = URES_MAKE_RESOURCE(URES_STRING_V2, base + dest.length()); | |
1548 | fWritten = TRUE; | |
1549 | switch(fNumCharsForLength) { | |
1550 | case 0: | |
1551 | break; | |
1552 | case 1: | |
1553 | dest.append((UChar)(0xdc00 + len)); | |
1554 | break; | |
1555 | case 2: | |
1556 | dest.append((UChar)(0xdfef + (len >> 16))); | |
1557 | dest.append((UChar)len); | |
1558 | break; | |
1559 | case 3: | |
1560 | dest.append((UChar)0xdfff); | |
1561 | dest.append((UChar)(len >> 16)); | |
1562 | dest.append((UChar)len); | |
1563 | break; | |
1564 | default: | |
1565 | break; /* will not occur */ | |
1566 | } | |
1567 | dest.append(fString); | |
1568 | dest.append((UChar)0); | |
1569 | } | |
1570 | ||
1571 | void | |
1572 | SRBRoot::compactStringsV2(UHashtable *stringSet, UErrorCode &errorCode) { | |
1573 | if (U_FAILURE(errorCode)) { | |
1574 | return; | |
1575 | } | |
1576 | // Store the StringResource pointers in an array for | |
1577 | // easy sorting and processing. | |
1578 | // We enumerate a set of strings, so there are no duplicates. | |
1579 | int32_t count = uhash_count(stringSet); | |
1580 | LocalArray<StringResource *> array(new StringResource *[count], errorCode); | |
1581 | if (U_FAILURE(errorCode)) { | |
1582 | return; | |
1583 | } | |
1584 | for (int32_t pos = UHASH_FIRST, i = 0; i < count; ++i) { | |
1585 | array[i] = (StringResource *)uhash_nextElement(stringSet, &pos)->key.pointer; | |
1586 | } | |
1587 | /* Sort the strings so that each one is immediately followed by all of its suffixes. */ | |
1588 | uprv_sortArray(array.getAlias(), count, (int32_t)sizeof(struct SResource **), | |
1589 | compareStringSuffixes, NULL, FALSE, &errorCode); | |
1590 | if (U_FAILURE(errorCode)) { | |
1591 | return; | |
1592 | } | |
1593 | /* | |
1594 | * Make suffixes point into earlier, longer strings that contain them. | |
1595 | * Temporarily use fSame and fSuffixOffset for suffix strings to | |
1596 | * refer to the remaining ones. | |
1597 | */ | |
1598 | for (int32_t i = 0; i < count;) { | |
1599 | /* | |
1600 | * This string is not a suffix of the previous one; | |
1601 | * write this one and subsume the following ones that are | |
1602 | * suffixes of this one. | |
1603 | */ | |
1604 | StringResource *res = array[i]; | |
1605 | res->fNumUnitsSaved = (res->fNumCopies - 1) * res->get16BitStringsLength(); | |
1606 | // Whole duplicates of pool strings are already account for in fPoolStringIndexLimit, | |
1607 | // see StringResource::handlePreflightStrings(). | |
1608 | int32_t j; | |
1609 | for (j = i + 1; j < count; ++j) { | |
1610 | StringResource *suffixRes = array[j]; | |
1611 | /* Is it a suffix of the earlier, longer string? */ | |
1612 | if (res->fString.endsWith(suffixRes->fString)) { | |
1613 | assert(res->length() != suffixRes->length()); // Set strings are unique. | |
1614 | if (suffixRes->fWritten) { | |
1615 | // Pool string, skip. | |
1616 | } else if (suffixRes->fNumCharsForLength == 0) { | |
1617 | /* yes, point to the earlier string */ | |
1618 | suffixRes->fSame = res; | |
1619 | suffixRes->fSuffixOffset = res->length() - suffixRes->length(); | |
1620 | if (res->fWritten) { | |
1621 | // Suffix-share res which is a pool string. | |
1622 | // Compute the resource word and collect the maximum. | |
1623 | suffixRes->fRes = | |
1624 | res->fRes + res->fNumCharsForLength + suffixRes->fSuffixOffset; | |
1625 | int32_t poolStringIndex = (int32_t)RES_GET_OFFSET(suffixRes->fRes); | |
1626 | if (poolStringIndex >= fPoolStringIndexLimit) { | |
1627 | fPoolStringIndexLimit = poolStringIndex + 1; | |
1628 | } | |
1629 | suffixRes->fWritten = TRUE; | |
1630 | } | |
1631 | res->fNumUnitsSaved += suffixRes->fNumCopies * suffixRes->get16BitStringsLength(); | |
1632 | } else { | |
1633 | /* write the suffix by itself if we need explicit length */ | |
1634 | } | |
1635 | } else { | |
1636 | break; /* not a suffix, restart from here */ | |
1637 | } | |
1638 | } | |
1639 | i = j; | |
1640 | } | |
1641 | /* | |
1642 | * Re-sort the strings by ascending length (except suffixes last) | |
1643 | * to optimize for URES_TABLE16 and URES_ARRAY16: | |
1644 | * Keep as many as possible within reach of 16-bit offsets. | |
1645 | */ | |
1646 | uprv_sortArray(array.getAlias(), count, (int32_t)sizeof(struct SResource **), | |
1647 | compareStringLengths, NULL, FALSE, &errorCode); | |
1648 | if (U_FAILURE(errorCode)) { | |
1649 | return; | |
1650 | } | |
1651 | if (fIsPoolBundle) { | |
1652 | // Write strings that are sufficiently shared. | |
1653 | // Avoid writing other strings. | |
1654 | int32_t numStringsWritten = 0; | |
1655 | int32_t numUnitsSaved = 0; | |
1656 | int32_t numUnitsNotSaved = 0; | |
1657 | for (int32_t i = 0; i < count; ++i) { | |
1658 | StringResource *res = array[i]; | |
1659 | // Maximum pool string index when suffix-sharing the last character. | |
1660 | int32_t maxStringIndex = | |
1661 | f16BitUnits.length() + res->fNumCharsForLength + res->length() - 1; | |
1662 | if (res->fNumUnitsSaved >= GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING && | |
1663 | maxStringIndex < RES_MAX_OFFSET) { | |
1664 | res->writeUTF16v2(0, f16BitUnits); | |
1665 | ++numStringsWritten; | |
1666 | numUnitsSaved += res->fNumUnitsSaved; | |
1667 | } else { | |
1668 | numUnitsNotSaved += res->fNumUnitsSaved; | |
1669 | res->fRes = URES_MAKE_EMPTY_RESOURCE(URES_STRING); | |
1670 | res->fWritten = TRUE; | |
1671 | } | |
1672 | } | |
1673 | if (f16BitUnits.isBogus()) { | |
1674 | errorCode = U_MEMORY_ALLOCATION_ERROR; | |
1675 | } | |
1676 | if (getShowWarning()) { // not quiet | |
1677 | printf("number of shared strings: %d\n", (int)numStringsWritten); | |
1678 | printf("16-bit units for strings: %6d = %6d bytes\n", | |
1679 | (int)f16BitUnits.length(), (int)f16BitUnits.length() * 2); | |
1680 | printf("16-bit units saved: %6d = %6d bytes\n", | |
1681 | (int)numUnitsSaved, (int)numUnitsSaved * 2); | |
1682 | printf("16-bit units not saved: %6d = %6d bytes\n", | |
1683 | (int)numUnitsNotSaved, (int)numUnitsNotSaved * 2); | |
1684 | } | |
1685 | } else { | |
1686 | assert(fPoolStringIndexLimit <= fUsePoolBundle->fStringIndexLimit); | |
1687 | /* Write the non-suffix strings. */ | |
1688 | int32_t i; | |
1689 | for (i = 0; i < count && array[i]->fSame == NULL; ++i) { | |
1690 | StringResource *res = array[i]; | |
1691 | if (!res->fWritten) { | |
1692 | int32_t localStringIndex = f16BitUnits.length(); | |
1693 | if (localStringIndex >= fLocalStringIndexLimit) { | |
1694 | fLocalStringIndexLimit = localStringIndex + 1; | |
1695 | } | |
1696 | res->writeUTF16v2(fPoolStringIndexLimit, f16BitUnits); | |
1697 | } | |
1698 | } | |
1699 | if (f16BitUnits.isBogus()) { | |
1700 | errorCode = U_MEMORY_ALLOCATION_ERROR; | |
1701 | return; | |
1702 | } | |
1703 | if (fWritePoolBundle != NULL && gFormatVersion >= 3) { | |
1704 | PseudoListResource *poolStrings = | |
1705 | static_cast<PseudoListResource *>(fWritePoolBundle->fRoot); | |
1706 | for (i = 0; i < count && array[i]->fSame == NULL; ++i) { | |
1707 | assert(!array[i]->fString.isEmpty()); | |
1708 | StringResource *poolString = | |
1709 | new StringResource(fWritePoolBundle, array[i]->fString, errorCode); | |
1710 | if (poolString == NULL) { | |
1711 | errorCode = U_MEMORY_ALLOCATION_ERROR; | |
1712 | break; | |
1713 | } | |
1714 | poolStrings->add(poolString); | |
1715 | } | |
1716 | } | |
1717 | /* Write the suffix strings. Make each point to the real string. */ | |
1718 | for (; i < count; ++i) { | |
1719 | StringResource *res = array[i]; | |
1720 | if (res->fWritten) { | |
1721 | continue; | |
1722 | } | |
1723 | StringResource *same = res->fSame; | |
1724 | assert(res->length() != same->length()); // Set strings are unique. | |
1725 | res->fRes = same->fRes + same->fNumCharsForLength + res->fSuffixOffset; | |
1726 | int32_t localStringIndex = (int32_t)RES_GET_OFFSET(res->fRes) - fPoolStringIndexLimit; | |
1727 | // Suffixes of pool strings have been set already. | |
1728 | assert(localStringIndex >= 0); | |
1729 | if (localStringIndex >= fLocalStringIndexLimit) { | |
1730 | fLocalStringIndexLimit = localStringIndex + 1; | |
1731 | } | |
1732 | res->fWritten = TRUE; | |
1733 | } | |
1734 | } | |
1735 | // +1 to account for the initial zero in f16BitUnits | |
1736 | assert(f16BitUnits.length() <= (f16BitStringsLength + 1)); | |
1737 | } | |
3d1f044b A |
1738 | |
1739 | void SResource::applyFilter( | |
1740 | const PathFilter& /*filter*/, | |
1741 | ResKeyPath& /*path*/, | |
1742 | const SRBRoot* /*bundle*/) { | |
1743 | // Only a few resource types (tables) are capable of being filtered. | |
1744 | } | |
1745 | ||
1746 | void TableResource::applyFilter( | |
1747 | const PathFilter& filter, | |
1748 | ResKeyPath& path, | |
1749 | const SRBRoot* bundle) { | |
1750 | SResource* prev = nullptr; | |
1751 | SResource* curr = fFirst; | |
1752 | for (; curr != nullptr;) { | |
1753 | path.push(curr->getKeyString(bundle)); | |
1754 | auto inclusion = filter.match(path); | |
1755 | if (inclusion == PathFilter::EInclusion::INCLUDE) { | |
1756 | // Include whole subtree | |
1757 | // no-op | |
1758 | if (isVerbose()) { | |
1759 | std::cout << "genrb subtree: " << bundle->fLocale << ": INCLUDE: " << path << std::endl; | |
1760 | } | |
1761 | } else if (inclusion == PathFilter::EInclusion::EXCLUDE) { | |
1762 | // Reject the whole subtree | |
1763 | // Remove it from the linked list | |
1764 | if (isVerbose()) { | |
1765 | std::cout << "genrb subtree: " << bundle->fLocale << ": DELETE: " << path << std::endl; | |
1766 | } | |
1767 | if (prev == nullptr) { | |
1768 | fFirst = curr->fNext; | |
1769 | } else { | |
1770 | prev->fNext = curr->fNext; | |
1771 | } | |
1772 | fCount--; | |
1773 | delete curr; | |
1774 | curr = prev; | |
1775 | } else { | |
1776 | U_ASSERT(inclusion == PathFilter::EInclusion::PARTIAL); | |
1777 | // Recurse into the child | |
1778 | curr->applyFilter(filter, path, bundle); | |
1779 | } | |
1780 | path.pop(); | |
1781 | ||
1782 | prev = curr; | |
1783 | if (curr == nullptr) { | |
1784 | curr = fFirst; | |
1785 | } else { | |
1786 | curr = curr->fNext; | |
1787 | } | |
1788 | } | |
1789 | } |