]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/genrb/reslist.cpp
ICU-57163.0.1.tar.gz
[apple/icu.git] / icuSources / tools / genrb / reslist.cpp
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2000-2015, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File reslist.cpp
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 02/21/00 weiv Creation.
15 *******************************************************************************
16 */
17
18 // Safer use of UnicodeString.
19 #ifndef UNISTR_FROM_CHAR_EXPLICIT
20 # define UNISTR_FROM_CHAR_EXPLICIT explicit
21 #endif
22
23 // Less important, but still a good idea.
24 #ifndef UNISTR_FROM_STRING_EXPLICIT
25 # define UNISTR_FROM_STRING_EXPLICIT explicit
26 #endif
27
28 #include <assert.h>
29 #include <stdio.h>
30 #include "unicode/localpointer.h"
31 #include "reslist.h"
32 #include "unewdata.h"
33 #include "unicode/ures.h"
34 #include "unicode/putil.h"
35 #include "errmsg.h"
36
37 #include "uarrsort.h"
38 #include "uelement.h"
39 #include "uhash.h"
40 #include "uinvchar.h"
41 #include "ustr_imp.h"
42 #include "unicode/utf16.h"
43 /*
44 * Align binary data at a 16-byte offset from the start of the resource bundle,
45 * to be safe for any data type it may contain.
46 */
47 #define BIN_ALIGNMENT 16
48
49 // This numeric constant must be at least 1.
50 // If StringResource.fNumUnitsSaved == 0 then the string occurs only once,
51 // and it makes no sense to move it to the pool bundle.
52 // The larger the threshold for fNumUnitsSaved
53 // the smaller the savings, and the smaller the pool bundle.
54 // We trade some total size reduction to reduce the pool bundle a bit,
55 // so that one can reasonably save data size by
56 // removing bundle files without rebuilding the pool bundle.
57 // This can also help to keep the pool and total (pool+local) string indexes
58 // within 16 bits, that is, within range of Table16 and Array16 containers.
59 #ifndef GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING
60 # define GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING 10
61 #endif
62
63 U_NAMESPACE_USE
64
65 static UBool gIncludeCopyright = FALSE;
66 static UBool gUsePoolBundle = FALSE;
67 static UBool gIsDefaultFormatVersion = TRUE;
68 static int32_t gFormatVersion = 3;
69
70 /* How do we store string values? */
71 enum {
72 STRINGS_UTF16_V1, /* formatVersion 1: int length + UChars + NUL + padding to 4 bytes */
73 STRINGS_UTF16_V2 /* formatVersion 2 & up: optional length in 1..3 UChars + UChars + NUL */
74 };
75
76 static const int32_t MAX_IMPLICIT_STRING_LENGTH = 40; /* do not store the length explicitly for such strings */
77
78 static const ResFile kNoPoolBundle;
79
80 /*
81 * res_none() returns the address of kNoResource,
82 * for use in non-error cases when no resource is to be added to the bundle.
83 * (NULL is used in error cases.)
84 */
85 static SResource kNoResource; // TODO: const
86
87 static UDataInfo dataInfo= {
88 sizeof(UDataInfo),
89 0,
90
91 U_IS_BIG_ENDIAN,
92 U_CHARSET_FAMILY,
93 sizeof(UChar),
94 0,
95
96 {0x52, 0x65, 0x73, 0x42}, /* dataFormat="ResB" */
97 {1, 3, 0, 0}, /* formatVersion */
98 {1, 4, 0, 0} /* dataVersion take a look at version inside parsed resb*/
99 };
100
101 static const UVersionInfo gFormatVersions[4] = { /* indexed by a major-formatVersion integer */
102 { 0, 0, 0, 0 },
103 { 1, 3, 0, 0 },
104 { 2, 0, 0, 0 },
105 { 3, 0, 0, 0 }
106 };
107 // Remember to update genrb.h GENRB_VERSION when changing the data format.
108 // (Or maybe we should remove GENRB_VERSION and report the ICU version number?)
109
110 static uint8_t calcPadding(uint32_t size) {
111 /* returns space we need to pad */
112 return (uint8_t) ((size % sizeof(uint32_t)) ? (sizeof(uint32_t) - (size % sizeof(uint32_t))) : 0);
113
114 }
115
116 void setIncludeCopyright(UBool val){
117 gIncludeCopyright=val;
118 }
119
120 UBool getIncludeCopyright(void){
121 return gIncludeCopyright;
122 }
123
124 void setFormatVersion(int32_t formatVersion) {
125 gIsDefaultFormatVersion = FALSE;
126 gFormatVersion = formatVersion;
127 }
128
129 int32_t getFormatVersion() {
130 return gFormatVersion;
131 }
132
133 void setUsePoolBundle(UBool use) {
134 gUsePoolBundle = use;
135 }
136
137 // TODO: return const pointer, or find another way to express "none"
138 struct SResource* res_none() {
139 return &kNoResource;
140 }
141
142 SResource::SResource()
143 : fType(URES_NONE), fWritten(FALSE), fRes(RES_BOGUS), fRes16(-1), fKey(-1), fKey16(-1),
144 line(0), fNext(NULL) {
145 ustr_init(&fComment);
146 }
147
148 SResource::SResource(SRBRoot *bundle, const char *tag, int8_t type, const UString* comment,
149 UErrorCode &errorCode)
150 : fType(type), fWritten(FALSE), fRes(RES_BOGUS), fRes16(-1),
151 fKey(bundle != NULL ? bundle->addTag(tag, errorCode) : -1), fKey16(-1),
152 line(0), fNext(NULL) {
153 ustr_init(&fComment);
154 if(comment != NULL) {
155 ustr_cpy(&fComment, comment, &errorCode);
156 }
157 }
158
159 SResource::~SResource() {
160 ustr_deinit(&fComment);
161 }
162
163 ContainerResource::~ContainerResource() {
164 SResource *current = fFirst;
165 while (current != NULL) {
166 SResource *next = current->fNext;
167 delete current;
168 current = next;
169 }
170 }
171
172 TableResource::~TableResource() {}
173
174 // TODO: clarify that containers adopt new items, even in error cases; use LocalPointer
175 void TableResource::add(SResource *res, int linenumber, UErrorCode &errorCode) {
176 if (U_FAILURE(errorCode) || res == NULL || res == &kNoResource) {
177 return;
178 }
179
180 /* remember this linenumber to report to the user if there is a duplicate key */
181 res->line = linenumber;
182
183 /* here we need to traverse the list */
184 ++fCount;
185
186 /* is the list still empty? */
187 if (fFirst == NULL) {
188 fFirst = res;
189 res->fNext = NULL;
190 return;
191 }
192
193 const char *resKeyString = fRoot->fKeys + res->fKey;
194
195 SResource *current = fFirst;
196
197 SResource *prev = NULL;
198 while (current != NULL) {
199 const char *currentKeyString = fRoot->fKeys + current->fKey;
200 int diff;
201 /*
202 * formatVersion 1: compare key strings in native-charset order
203 * formatVersion 2 and up: compare key strings in ASCII order
204 */
205 if (gFormatVersion == 1 || U_CHARSET_FAMILY == U_ASCII_FAMILY) {
206 diff = uprv_strcmp(currentKeyString, resKeyString);
207 } else {
208 diff = uprv_compareInvCharsAsAscii(currentKeyString, resKeyString);
209 }
210 if (diff < 0) {
211 prev = current;
212 current = current->fNext;
213 } else if (diff > 0) {
214 /* we're either in front of the list, or in the middle */
215 if (prev == NULL) {
216 /* front of the list */
217 fFirst = res;
218 } else {
219 /* middle of the list */
220 prev->fNext = res;
221 }
222
223 res->fNext = current;
224 return;
225 } else {
226 /* Key already exists! ERROR! */
227 error(linenumber, "duplicate key '%s' in table, first appeared at line %d", currentKeyString, current->line);
228 errorCode = U_UNSUPPORTED_ERROR;
229 return;
230 }
231 }
232
233 /* end of list */
234 prev->fNext = res;
235 res->fNext = NULL;
236 }
237
238 ArrayResource::~ArrayResource() {}
239
240 void ArrayResource::add(SResource *res) {
241 if (res != NULL && res != &kNoResource) {
242 if (fFirst == NULL) {
243 fFirst = res;
244 } else {
245 fLast->fNext = res;
246 }
247 fLast = res;
248 ++fCount;
249 }
250 }
251
252 PseudoListResource::~PseudoListResource() {}
253
254 void PseudoListResource::add(SResource *res) {
255 if (res != NULL && res != &kNoResource) {
256 res->fNext = fFirst;
257 fFirst = res;
258 ++fCount;
259 }
260 }
261
262 StringBaseResource::StringBaseResource(SRBRoot *bundle, const char *tag, int8_t type,
263 const UChar *value, int32_t len,
264 const UString* comment, UErrorCode &errorCode)
265 : SResource(bundle, tag, type, comment, errorCode) {
266 if (len == 0 && gFormatVersion > 1) {
267 fRes = URES_MAKE_EMPTY_RESOURCE(type);
268 fWritten = TRUE;
269 return;
270 }
271
272 fString.setTo(value, len);
273 fString.getTerminatedBuffer(); // Some code relies on NUL-termination.
274 if (U_SUCCESS(errorCode) && fString.isBogus()) {
275 errorCode = U_MEMORY_ALLOCATION_ERROR;
276 }
277 }
278
279 StringBaseResource::StringBaseResource(SRBRoot *bundle, int8_t type,
280 const icu::UnicodeString &value, UErrorCode &errorCode)
281 : SResource(bundle, NULL, type, NULL, errorCode), fString(value) {
282 if (value.isEmpty() && gFormatVersion > 1) {
283 fRes = URES_MAKE_EMPTY_RESOURCE(type);
284 fWritten = TRUE;
285 return;
286 }
287
288 fString.getTerminatedBuffer(); // Some code relies on NUL-termination.
289 if (U_SUCCESS(errorCode) && fString.isBogus()) {
290 errorCode = U_MEMORY_ALLOCATION_ERROR;
291 }
292 }
293
294 // Pool bundle string, alias the buffer. Guaranteed NUL-terminated and not empty.
295 StringBaseResource::StringBaseResource(int8_t type, const UChar *value, int32_t len,
296 UErrorCode &errorCode)
297 : SResource(NULL, NULL, type, NULL, errorCode), fString(TRUE, value, len) {
298 assert(len > 0);
299 assert(!fString.isBogus());
300 }
301
302 StringBaseResource::~StringBaseResource() {}
303
304 static int32_t U_CALLCONV
305 string_hash(const UElement key) {
306 const StringResource *res = static_cast<const StringResource *>(key.pointer);
307 return res->fString.hashCode();
308 }
309
310 static UBool U_CALLCONV
311 string_comp(const UElement key1, const UElement key2) {
312 const StringResource *res1 = static_cast<const StringResource *>(key1.pointer);
313 const StringResource *res2 = static_cast<const StringResource *>(key2.pointer);
314 return res1->fString == res2->fString;
315 }
316
317 StringResource::~StringResource() {}
318
319 AliasResource::~AliasResource() {}
320
321 IntResource::IntResource(SRBRoot *bundle, const char *tag, int32_t value,
322 const UString* comment, UErrorCode &errorCode)
323 : SResource(bundle, tag, URES_INT, comment, errorCode) {
324 fValue = value;
325 fRes = URES_MAKE_RESOURCE(URES_INT, value & RES_MAX_OFFSET);
326 fWritten = TRUE;
327 }
328
329 IntResource::~IntResource() {}
330
331 IntVectorResource::IntVectorResource(SRBRoot *bundle, const char *tag,
332 const UString* comment, UErrorCode &errorCode)
333 : SResource(bundle, tag, URES_INT_VECTOR, comment, errorCode),
334 fCount(0), fArray(new uint32_t[RESLIST_MAX_INT_VECTOR]) {
335 if (fArray == NULL) {
336 errorCode = U_MEMORY_ALLOCATION_ERROR;
337 return;
338 }
339 }
340
341 IntVectorResource::~IntVectorResource() {
342 delete[] fArray;
343 }
344
345 void IntVectorResource::add(int32_t value, UErrorCode &errorCode) {
346 if (U_SUCCESS(errorCode)) {
347 fArray[fCount++] = value;
348 }
349 }
350
351 BinaryResource::BinaryResource(SRBRoot *bundle, const char *tag,
352 uint32_t length, uint8_t *data, const char* fileName,
353 const UString* comment, UErrorCode &errorCode)
354 : SResource(bundle, tag, URES_BINARY, comment, errorCode),
355 fLength(length), fData(NULL), fFileName(NULL) {
356 if (U_FAILURE(errorCode)) {
357 return;
358 }
359 if (fileName != NULL && *fileName != 0){
360 fFileName = new char[uprv_strlen(fileName)+1];
361 if (fFileName == NULL) {
362 errorCode = U_MEMORY_ALLOCATION_ERROR;
363 return;
364 }
365 uprv_strcpy(fFileName, fileName);
366 }
367 if (length > 0) {
368 fData = new uint8_t[length];
369 if (fData == NULL) {
370 errorCode = U_MEMORY_ALLOCATION_ERROR;
371 return;
372 }
373 uprv_memcpy(fData, data, length);
374 } else {
375 if (gFormatVersion > 1) {
376 fRes = URES_MAKE_EMPTY_RESOURCE(URES_BINARY);
377 fWritten = TRUE;
378 }
379 }
380 }
381
382 BinaryResource::~BinaryResource() {
383 delete[] fData;
384 delete[] fFileName;
385 }
386
387 /* Writing Functions */
388
389 void
390 StringResource::handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet,
391 UErrorCode &errorCode) {
392 assert(fSame == NULL);
393 fSame = static_cast<StringResource *>(uhash_get(stringSet, this));
394 if (fSame != NULL) {
395 // This is a duplicate of a pool bundle string or of an earlier-visited string.
396 if (++fSame->fNumCopies == 1) {
397 assert(fSame->fWritten);
398 int32_t poolStringIndex = (int32_t)RES_GET_OFFSET(fSame->fRes);
399 if (poolStringIndex >= bundle->fPoolStringIndexLimit) {
400 bundle->fPoolStringIndexLimit = poolStringIndex + 1;
401 }
402 }
403 return;
404 }
405 /* Put this string into the set for finding duplicates. */
406 fNumCopies = 1;
407 uhash_put(stringSet, this, this, &errorCode);
408
409 if (bundle->fStringsForm != STRINGS_UTF16_V1) {
410 int32_t len = length();
411 if (len <= MAX_IMPLICIT_STRING_LENGTH &&
412 !U16_IS_TRAIL(fString[0]) && fString.indexOf((UChar)0) < 0) {
413 /*
414 * This string will be stored without an explicit length.
415 * Runtime will detect !U16_IS_TRAIL(s[0]) and call u_strlen().
416 */
417 fNumCharsForLength = 0;
418 } else if (len <= 0x3ee) {
419 fNumCharsForLength = 1;
420 } else if (len <= 0xfffff) {
421 fNumCharsForLength = 2;
422 } else {
423 fNumCharsForLength = 3;
424 }
425 bundle->f16BitStringsLength += fNumCharsForLength + len + 1; /* +1 for the NUL */
426 }
427 }
428
429 void
430 ContainerResource::handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet,
431 UErrorCode &errorCode) {
432 for (SResource *current = fFirst; current != NULL; current = current->fNext) {
433 current->preflightStrings(bundle, stringSet, errorCode);
434 }
435 }
436
437 void
438 SResource::preflightStrings(SRBRoot *bundle, UHashtable *stringSet, UErrorCode &errorCode) {
439 if (U_FAILURE(errorCode)) {
440 return;
441 }
442 if (fRes != RES_BOGUS) {
443 /*
444 * The resource item word was already precomputed, which means
445 * no further data needs to be written.
446 * This might be an integer, or an empty string/binary/etc.
447 */
448 return;
449 }
450 handlePreflightStrings(bundle, stringSet, errorCode);
451 }
452
453 void
454 SResource::handlePreflightStrings(SRBRoot * /*bundle*/, UHashtable * /*stringSet*/,
455 UErrorCode & /*errorCode*/) {
456 /* Neither a string nor a container. */
457 }
458
459 int32_t
460 SRBRoot::makeRes16(uint32_t resWord) const {
461 if (resWord == 0) {
462 return 0; /* empty string */
463 }
464 uint32_t type = RES_GET_TYPE(resWord);
465 int32_t offset = (int32_t)RES_GET_OFFSET(resWord);
466 if (type == URES_STRING_V2) {
467 assert(offset > 0);
468 if (offset < fPoolStringIndexLimit) {
469 if (offset < fPoolStringIndex16Limit) {
470 return offset;
471 }
472 } else {
473 offset = offset - fPoolStringIndexLimit + fPoolStringIndex16Limit;
474 if (offset <= 0xffff) {
475 return offset;
476 }
477 }
478 }
479 return -1;
480 }
481
482 int32_t
483 SRBRoot::mapKey(int32_t oldpos) const {
484 const KeyMapEntry *map = fKeyMap;
485 if (map == NULL) {
486 return oldpos;
487 }
488 int32_t i, start, limit;
489
490 /* do a binary search for the old, pre-compactKeys() key offset */
491 start = fUsePoolBundle->fKeysCount;
492 limit = start + fKeysCount;
493 while (start < limit - 1) {
494 i = (start + limit) / 2;
495 if (oldpos < map[i].oldpos) {
496 limit = i;
497 } else {
498 start = i;
499 }
500 }
501 assert(oldpos == map[start].oldpos);
502 return map[start].newpos;
503 }
504
505 /*
506 * Only called for UTF-16 v1 strings and duplicate UTF-16 v2 strings.
507 * For unique UTF-16 v2 strings, write16() sees fRes != RES_BOGUS
508 * and exits early.
509 */
510 void
511 StringResource::handleWrite16(SRBRoot * /*bundle*/) {
512 SResource *same;
513 if ((same = fSame) != NULL) {
514 /* This is a duplicate. */
515 assert(same->fRes != RES_BOGUS && same->fWritten);
516 fRes = same->fRes;
517 fWritten = same->fWritten;
518 }
519 }
520
521 void
522 ContainerResource::writeAllRes16(SRBRoot *bundle) {
523 for (SResource *current = fFirst; current != NULL; current = current->fNext) {
524 bundle->f16BitUnits.append((UChar)current->fRes16);
525 }
526 fWritten = TRUE;
527 }
528
529 void
530 ArrayResource::handleWrite16(SRBRoot *bundle) {
531 if (fCount == 0 && gFormatVersion > 1) {
532 fRes = URES_MAKE_EMPTY_RESOURCE(URES_ARRAY);
533 fWritten = TRUE;
534 return;
535 }
536
537 int32_t res16 = 0;
538 for (SResource *current = fFirst; current != NULL; current = current->fNext) {
539 current->write16(bundle);
540 res16 |= current->fRes16;
541 }
542 if (fCount <= 0xffff && res16 >= 0 && gFormatVersion > 1) {
543 fRes = URES_MAKE_RESOURCE(URES_ARRAY16, bundle->f16BitUnits.length());
544 bundle->f16BitUnits.append((UChar)fCount);
545 writeAllRes16(bundle);
546 }
547 }
548
549 void
550 TableResource::handleWrite16(SRBRoot *bundle) {
551 if (fCount == 0 && gFormatVersion > 1) {
552 fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE);
553 fWritten = TRUE;
554 return;
555 }
556 /* Find the smallest table type that fits the data. */
557 int32_t key16 = 0;
558 int32_t res16 = 0;
559 for (SResource *current = fFirst; current != NULL; current = current->fNext) {
560 current->write16(bundle);
561 key16 |= current->fKey16;
562 res16 |= current->fRes16;
563 }
564 if(fCount > (uint32_t)bundle->fMaxTableLength) {
565 bundle->fMaxTableLength = fCount;
566 }
567 if (fCount <= 0xffff && key16 >= 0) {
568 if (res16 >= 0 && gFormatVersion > 1) {
569 /* 16-bit count, key offsets and values */
570 fRes = URES_MAKE_RESOURCE(URES_TABLE16, bundle->f16BitUnits.length());
571 bundle->f16BitUnits.append((UChar)fCount);
572 for (SResource *current = fFirst; current != NULL; current = current->fNext) {
573 bundle->f16BitUnits.append((UChar)current->fKey16);
574 }
575 writeAllRes16(bundle);
576 } else {
577 /* 16-bit count, 16-bit key offsets, 32-bit values */
578 fTableType = URES_TABLE;
579 }
580 } else {
581 /* 32-bit count, key offsets and values */
582 fTableType = URES_TABLE32;
583 }
584 }
585
586 void
587 PseudoListResource::handleWrite16(SRBRoot * /*bundle*/) {
588 fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE);
589 fWritten = TRUE;
590 }
591
592 void
593 SResource::write16(SRBRoot *bundle) {
594 if (fKey >= 0) {
595 // A tagged resource has a non-negative key index into the parsed key strings.
596 // compactKeys() built a map from parsed key index to the final key index.
597 // After the mapping, negative key indexes are used for shared pool bundle keys.
598 fKey = bundle->mapKey(fKey);
599 // If the key index fits into a Key16 for a Table or Table16,
600 // then set the fKey16 field accordingly.
601 // Otherwise keep it at -1.
602 if (fKey >= 0) {
603 if (fKey < bundle->fLocalKeyLimit) {
604 fKey16 = fKey;
605 }
606 } else {
607 int32_t poolKeyIndex = fKey & 0x7fffffff;
608 if (poolKeyIndex <= 0xffff) {
609 poolKeyIndex += bundle->fLocalKeyLimit;
610 if (poolKeyIndex <= 0xffff) {
611 fKey16 = poolKeyIndex;
612 }
613 }
614 }
615 }
616 /*
617 * fRes != RES_BOGUS:
618 * The resource item word was already precomputed, which means
619 * no further data needs to be written.
620 * This might be an integer, or an empty or UTF-16 v2 string,
621 * an empty binary, etc.
622 */
623 if (fRes == RES_BOGUS) {
624 handleWrite16(bundle);
625 }
626 // Compute fRes16 for precomputed as well as just-computed fRes.
627 fRes16 = bundle->makeRes16(fRes);
628 }
629
630 void
631 SResource::handleWrite16(SRBRoot * /*bundle*/) {
632 /* Only a few resource types write 16-bit units. */
633 }
634
635 /*
636 * Only called for UTF-16 v1 strings, and for aliases.
637 * For UTF-16 v2 strings, preWrite() sees fRes != RES_BOGUS
638 * and exits early.
639 */
640 void
641 StringBaseResource::handlePreWrite(uint32_t *byteOffset) {
642 /* Write the UTF-16 v1 string. */
643 fRes = URES_MAKE_RESOURCE(fType, *byteOffset >> 2);
644 *byteOffset += 4 + (length() + 1) * U_SIZEOF_UCHAR;
645 }
646
647 void
648 IntVectorResource::handlePreWrite(uint32_t *byteOffset) {
649 if (fCount == 0 && gFormatVersion > 1) {
650 fRes = URES_MAKE_EMPTY_RESOURCE(URES_INT_VECTOR);
651 fWritten = TRUE;
652 } else {
653 fRes = URES_MAKE_RESOURCE(URES_INT_VECTOR, *byteOffset >> 2);
654 *byteOffset += (1 + fCount) * 4;
655 }
656 }
657
658 void
659 BinaryResource::handlePreWrite(uint32_t *byteOffset) {
660 uint32_t pad = 0;
661 uint32_t dataStart = *byteOffset + sizeof(fLength);
662
663 if (dataStart % BIN_ALIGNMENT) {
664 pad = (BIN_ALIGNMENT - dataStart % BIN_ALIGNMENT);
665 *byteOffset += pad; /* pad == 4 or 8 or 12 */
666 }
667 fRes = URES_MAKE_RESOURCE(URES_BINARY, *byteOffset >> 2);
668 *byteOffset += 4 + fLength;
669 }
670
671 void
672 ContainerResource::preWriteAllRes(uint32_t *byteOffset) {
673 for (SResource *current = fFirst; current != NULL; current = current->fNext) {
674 current->preWrite(byteOffset);
675 }
676 }
677
678 void
679 ArrayResource::handlePreWrite(uint32_t *byteOffset) {
680 preWriteAllRes(byteOffset);
681 fRes = URES_MAKE_RESOURCE(URES_ARRAY, *byteOffset >> 2);
682 *byteOffset += (1 + fCount) * 4;
683 }
684
685 void
686 TableResource::handlePreWrite(uint32_t *byteOffset) {
687 preWriteAllRes(byteOffset);
688 if (fTableType == URES_TABLE) {
689 /* 16-bit count, 16-bit key offsets, 32-bit values */
690 fRes = URES_MAKE_RESOURCE(URES_TABLE, *byteOffset >> 2);
691 *byteOffset += 2 + fCount * 6;
692 } else {
693 /* 32-bit count, key offsets and values */
694 fRes = URES_MAKE_RESOURCE(URES_TABLE32, *byteOffset >> 2);
695 *byteOffset += 4 + fCount * 8;
696 }
697 }
698
699 void
700 SResource::preWrite(uint32_t *byteOffset) {
701 if (fRes != RES_BOGUS) {
702 /*
703 * The resource item word was already precomputed, which means
704 * no further data needs to be written.
705 * This might be an integer, or an empty or UTF-16 v2 string,
706 * an empty binary, etc.
707 */
708 return;
709 }
710 handlePreWrite(byteOffset);
711 *byteOffset += calcPadding(*byteOffset);
712 }
713
714 void
715 SResource::handlePreWrite(uint32_t * /*byteOffset*/) {
716 assert(FALSE);
717 }
718
719 /*
720 * Only called for UTF-16 v1 strings, and for aliases. For UTF-16 v2 strings,
721 * write() sees fWritten and exits early.
722 */
723 void
724 StringBaseResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
725 /* Write the UTF-16 v1 string. */
726 int32_t len = length();
727 udata_write32(mem, len);
728 udata_writeUString(mem, getBuffer(), len + 1);
729 *byteOffset += 4 + (len + 1) * U_SIZEOF_UCHAR;
730 fWritten = TRUE;
731 }
732
733 void
734 ContainerResource::writeAllRes(UNewDataMemory *mem, uint32_t *byteOffset) {
735 uint32_t i = 0;
736 for (SResource *current = fFirst; current != NULL; ++i, current = current->fNext) {
737 current->write(mem, byteOffset);
738 }
739 assert(i == fCount);
740 }
741
742 void
743 ContainerResource::writeAllRes32(UNewDataMemory *mem, uint32_t *byteOffset) {
744 for (SResource *current = fFirst; current != NULL; current = current->fNext) {
745 udata_write32(mem, current->fRes);
746 }
747 *byteOffset += fCount * 4;
748 }
749
750 void
751 ArrayResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
752 writeAllRes(mem, byteOffset);
753 udata_write32(mem, fCount);
754 *byteOffset += 4;
755 writeAllRes32(mem, byteOffset);
756 }
757
758 void
759 IntVectorResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
760 udata_write32(mem, fCount);
761 for(uint32_t i = 0; i < fCount; ++i) {
762 udata_write32(mem, fArray[i]);
763 }
764 *byteOffset += (1 + fCount) * 4;
765 }
766
767 void
768 BinaryResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
769 uint32_t pad = 0;
770 uint32_t dataStart = *byteOffset + sizeof(fLength);
771
772 if (dataStart % BIN_ALIGNMENT) {
773 pad = (BIN_ALIGNMENT - dataStart % BIN_ALIGNMENT);
774 udata_writePadding(mem, pad); /* pad == 4 or 8 or 12 */
775 *byteOffset += pad;
776 }
777
778 udata_write32(mem, fLength);
779 if (fLength > 0) {
780 udata_writeBlock(mem, fData, fLength);
781 }
782 *byteOffset += 4 + fLength;
783 }
784
785 void
786 TableResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
787 writeAllRes(mem, byteOffset);
788 if(fTableType == URES_TABLE) {
789 udata_write16(mem, (uint16_t)fCount);
790 for (SResource *current = fFirst; current != NULL; current = current->fNext) {
791 udata_write16(mem, current->fKey16);
792 }
793 *byteOffset += (1 + fCount)* 2;
794 if ((fCount & 1) == 0) {
795 /* 16-bit count and even number of 16-bit key offsets need padding before 32-bit resource items */
796 udata_writePadding(mem, 2);
797 *byteOffset += 2;
798 }
799 } else /* URES_TABLE32 */ {
800 udata_write32(mem, fCount);
801 for (SResource *current = fFirst; current != NULL; current = current->fNext) {
802 udata_write32(mem, (uint32_t)current->fKey);
803 }
804 *byteOffset += (1 + fCount)* 4;
805 }
806 writeAllRes32(mem, byteOffset);
807 }
808
809 void
810 SResource::write(UNewDataMemory *mem, uint32_t *byteOffset) {
811 if (fWritten) {
812 assert(fRes != RES_BOGUS);
813 return;
814 }
815 handleWrite(mem, byteOffset);
816 uint8_t paddingSize = calcPadding(*byteOffset);
817 if (paddingSize > 0) {
818 udata_writePadding(mem, paddingSize);
819 *byteOffset += paddingSize;
820 }
821 fWritten = TRUE;
822 }
823
824 void
825 SResource::handleWrite(UNewDataMemory * /*mem*/, uint32_t * /*byteOffset*/) {
826 assert(FALSE);
827 }
828
829 void SRBRoot::write(const char *outputDir, const char *outputPkg,
830 char *writtenFilename, int writtenFilenameLen,
831 UErrorCode &errorCode) {
832 UNewDataMemory *mem = NULL;
833 uint32_t byteOffset = 0;
834 uint32_t top, size;
835 char dataName[1024];
836 int32_t indexes[URES_INDEX_TOP];
837
838 compactKeys(errorCode);
839 /*
840 * Add padding bytes to fKeys so that fKeysTop is 4-aligned.
841 * Safe because the capacity is a multiple of 4.
842 */
843 while (fKeysTop & 3) {
844 fKeys[fKeysTop++] = (char)0xaa;
845 }
846 /*
847 * In URES_TABLE, use all local key offsets that fit into 16 bits,
848 * and use the remaining 16-bit offsets for pool key offsets
849 * if there are any.
850 * If there are no local keys, then use the whole 16-bit space
851 * for pool key offsets.
852 * Note: This cannot be changed without changing the major formatVersion.
853 */
854 if (fKeysBottom < fKeysTop) {
855 if (fKeysTop <= 0x10000) {
856 fLocalKeyLimit = fKeysTop;
857 } else {
858 fLocalKeyLimit = 0x10000;
859 }
860 } else {
861 fLocalKeyLimit = 0;
862 }
863
864 UHashtable *stringSet;
865 if (gFormatVersion > 1) {
866 stringSet = uhash_open(string_hash, string_comp, string_comp, &errorCode);
867 if (U_SUCCESS(errorCode) &&
868 fUsePoolBundle != NULL && fUsePoolBundle->fStrings != NULL) {
869 for (SResource *current = fUsePoolBundle->fStrings->fFirst;
870 current != NULL;
871 current = current->fNext) {
872 StringResource *sr = static_cast<StringResource *>(current);
873 sr->fNumCopies = 0;
874 sr->fNumUnitsSaved = 0;
875 uhash_put(stringSet, sr, sr, &errorCode);
876 }
877 }
878 fRoot->preflightStrings(this, stringSet, errorCode);
879 } else {
880 stringSet = NULL;
881 }
882 if (fStringsForm == STRINGS_UTF16_V2 && f16BitStringsLength > 0) {
883 compactStringsV2(stringSet, errorCode);
884 }
885 uhash_close(stringSet);
886 if (U_FAILURE(errorCode)) {
887 return;
888 }
889
890 int32_t formatVersion = gFormatVersion;
891 if (fPoolStringIndexLimit != 0) {
892 int32_t sum = fPoolStringIndexLimit + fLocalStringIndexLimit;
893 if ((sum - 1) > RES_MAX_OFFSET) {
894 errorCode = U_BUFFER_OVERFLOW_ERROR;
895 return;
896 }
897 if (fPoolStringIndexLimit < 0x10000 && sum <= 0x10000) {
898 // 16-bit indexes work for all pool + local strings.
899 fPoolStringIndex16Limit = fPoolStringIndexLimit;
900 } else {
901 // Set the pool index threshold so that 16-bit indexes work
902 // for some pool strings and some local strings.
903 fPoolStringIndex16Limit = (int32_t)(
904 ((int64_t)fPoolStringIndexLimit * 0xffff) / sum);
905 }
906 } else if (gIsDefaultFormatVersion && formatVersion == 3 && !fIsPoolBundle) {
907 // If we just default to formatVersion 3
908 // but there are no pool bundle strings to share
909 // and we do not write a pool bundle,
910 // then write formatVersion 2 which is just as good.
911 formatVersion = 2;
912 }
913
914 fRoot->write16(this);
915 if (f16BitUnits.isBogus()) {
916 errorCode = U_MEMORY_ALLOCATION_ERROR;
917 return;
918 }
919 if (f16BitUnits.length() & 1) {
920 f16BitUnits.append((UChar)0xaaaa); /* pad to multiple of 4 bytes */
921 }
922 /* all keys have been mapped */
923 uprv_free(fKeyMap);
924 fKeyMap = NULL;
925
926 byteOffset = fKeysTop + f16BitUnits.length() * 2;
927 fRoot->preWrite(&byteOffset);
928
929 /* total size including the root item */
930 top = byteOffset;
931
932 if (writtenFilename && writtenFilenameLen) {
933 *writtenFilename = 0;
934 }
935
936 if (writtenFilename) {
937 int32_t off = 0, len = 0;
938 if (outputDir) {
939 len = (int32_t)uprv_strlen(outputDir);
940 if (len > writtenFilenameLen) {
941 len = writtenFilenameLen;
942 }
943 uprv_strncpy(writtenFilename, outputDir, len);
944 }
945 if (writtenFilenameLen -= len) {
946 off += len;
947 writtenFilename[off] = U_FILE_SEP_CHAR;
948 if (--writtenFilenameLen) {
949 ++off;
950 if(outputPkg != NULL)
951 {
952 uprv_strcpy(writtenFilename+off, outputPkg);
953 off += (int32_t)uprv_strlen(outputPkg);
954 writtenFilename[off] = '_';
955 ++off;
956 }
957
958 len = (int32_t)uprv_strlen(fLocale);
959 if (len > writtenFilenameLen) {
960 len = writtenFilenameLen;
961 }
962 uprv_strncpy(writtenFilename + off, fLocale, len);
963 if (writtenFilenameLen -= len) {
964 off += len;
965 len = 5;
966 if (len > writtenFilenameLen) {
967 len = writtenFilenameLen;
968 }
969 uprv_strncpy(writtenFilename + off, ".res", len);
970 }
971 }
972 }
973 }
974
975 if(outputPkg)
976 {
977 uprv_strcpy(dataName, outputPkg);
978 uprv_strcat(dataName, "_");
979 uprv_strcat(dataName, fLocale);
980 }
981 else
982 {
983 uprv_strcpy(dataName, fLocale);
984 }
985
986 uprv_memcpy(dataInfo.formatVersion, gFormatVersions + formatVersion, sizeof(UVersionInfo));
987
988 mem = udata_create(outputDir, "res", dataName,
989 &dataInfo, (gIncludeCopyright==TRUE)? U_COPYRIGHT_STRING:NULL, &errorCode);
990 if(U_FAILURE(errorCode)){
991 return;
992 }
993
994 /* write the root item */
995 udata_write32(mem, fRoot->fRes);
996
997 /*
998 * formatVersion 1.1 (ICU 2.8):
999 * write int32_t indexes[] after root and before the key strings
1000 * to make it easier to parse resource bundles in icuswap or from Java etc.
1001 */
1002 uprv_memset(indexes, 0, sizeof(indexes));
1003 indexes[URES_INDEX_LENGTH]= fIndexLength;
1004 indexes[URES_INDEX_KEYS_TOP]= fKeysTop>>2;
1005 indexes[URES_INDEX_RESOURCES_TOP]= (int32_t)(top>>2);
1006 indexes[URES_INDEX_BUNDLE_TOP]= indexes[URES_INDEX_RESOURCES_TOP];
1007 indexes[URES_INDEX_MAX_TABLE_LENGTH]= fMaxTableLength;
1008
1009 /*
1010 * formatVersion 1.2 (ICU 3.6):
1011 * write indexes[URES_INDEX_ATTRIBUTES] with URES_ATT_NO_FALLBACK set or not set
1012 * the memset() above initialized all indexes[] to 0
1013 */
1014 if (fNoFallback) {
1015 indexes[URES_INDEX_ATTRIBUTES]=URES_ATT_NO_FALLBACK;
1016 }
1017 /*
1018 * formatVersion 2.0 (ICU 4.4):
1019 * more compact string value storage, optional pool bundle
1020 */
1021 if (URES_INDEX_16BIT_TOP < fIndexLength) {
1022 indexes[URES_INDEX_16BIT_TOP] = (fKeysTop>>2) + (f16BitUnits.length()>>1);
1023 }
1024 if (URES_INDEX_POOL_CHECKSUM < fIndexLength) {
1025 if (fIsPoolBundle) {
1026 indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_IS_POOL_BUNDLE | URES_ATT_NO_FALLBACK;
1027 uint32_t checksum = computeCRC((const char *)(fKeys + fKeysBottom),
1028 (uint32_t)(fKeysTop - fKeysBottom), 0);
1029 if (f16BitUnits.length() <= 1) {
1030 // no pool strings to checksum
1031 } else if (U_IS_BIG_ENDIAN) {
1032 checksum = computeCRC((const char *)f16BitUnits.getBuffer(),
1033 (uint32_t)f16BitUnits.length() * 2, checksum);
1034 } else {
1035 // Swap to big-endian so we get the same checksum on all platforms
1036 // (except for charset family, due to the key strings).
1037 UnicodeString s(f16BitUnits);
1038 s.append((UChar)1); // Ensure that we own this buffer.
1039 assert(!s.isBogus());
1040 uint16_t *p = (uint16_t *)s.getBuffer();
1041 for (int32_t count = f16BitUnits.length(); count > 0; --count) {
1042 uint16_t x = *p;
1043 *p++ = (uint16_t)((x << 8) | (x >> 8));
1044 }
1045 checksum = computeCRC((const char *)p,
1046 (uint32_t)f16BitUnits.length() * 2, checksum);
1047 }
1048 indexes[URES_INDEX_POOL_CHECKSUM] = (int32_t)checksum;
1049 } else if (gUsePoolBundle) {
1050 indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_USES_POOL_BUNDLE;
1051 indexes[URES_INDEX_POOL_CHECKSUM] = fUsePoolBundle->fChecksum;
1052 }
1053 }
1054 // formatVersion 3 (ICU 56):
1055 // share string values via pool bundle strings
1056 indexes[URES_INDEX_LENGTH] |= fPoolStringIndexLimit << 8; // bits 23..0 -> 31..8
1057 indexes[URES_INDEX_ATTRIBUTES] |= (fPoolStringIndexLimit >> 12) & 0xf000; // bits 27..24 -> 15..12
1058 indexes[URES_INDEX_ATTRIBUTES] |= fPoolStringIndex16Limit << 16;
1059
1060 /* write the indexes[] */
1061 udata_writeBlock(mem, indexes, fIndexLength*4);
1062
1063 /* write the table key strings */
1064 udata_writeBlock(mem, fKeys+fKeysBottom,
1065 fKeysTop-fKeysBottom);
1066
1067 /* write the v2 UTF-16 strings, URES_TABLE16 and URES_ARRAY16 */
1068 udata_writeBlock(mem, f16BitUnits.getBuffer(), f16BitUnits.length()*2);
1069
1070 /* write all of the bundle contents: the root item and its children */
1071 byteOffset = fKeysTop + f16BitUnits.length() * 2;
1072 fRoot->write(mem, &byteOffset);
1073 assert(byteOffset == top);
1074
1075 size = udata_finish(mem, &errorCode);
1076 if(top != size) {
1077 fprintf(stderr, "genrb error: wrote %u bytes but counted %u\n",
1078 (int)size, (int)top);
1079 errorCode = U_INTERNAL_PROGRAM_ERROR;
1080 }
1081 }
1082
1083 /* Opening Functions */
1084
1085 TableResource* table_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
1086 LocalPointer<TableResource> res(new TableResource(bundle, tag, comment, *status), *status);
1087 return U_SUCCESS(*status) ? res.orphan() : NULL;
1088 }
1089
1090 ArrayResource* array_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
1091 LocalPointer<ArrayResource> res(new ArrayResource(bundle, tag, comment, *status), *status);
1092 return U_SUCCESS(*status) ? res.orphan() : NULL;
1093 }
1094
1095 struct SResource *string_open(struct SRBRoot *bundle, const char *tag, const UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
1096 LocalPointer<SResource> res(
1097 new StringResource(bundle, tag, value, len, comment, *status), *status);
1098 return U_SUCCESS(*status) ? res.orphan() : NULL;
1099 }
1100
1101 struct SResource *alias_open(struct SRBRoot *bundle, const char *tag, UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
1102 LocalPointer<SResource> res(
1103 new AliasResource(bundle, tag, value, len, comment, *status), *status);
1104 return U_SUCCESS(*status) ? res.orphan() : NULL;
1105 }
1106
1107 IntVectorResource *intvector_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
1108 LocalPointer<IntVectorResource> res(
1109 new IntVectorResource(bundle, tag, comment, *status), *status);
1110 return U_SUCCESS(*status) ? res.orphan() : NULL;
1111 }
1112
1113 struct SResource *int_open(struct SRBRoot *bundle, const char *tag, int32_t value, const struct UString* comment, UErrorCode *status) {
1114 LocalPointer<SResource> res(new IntResource(bundle, tag, value, comment, *status), *status);
1115 return U_SUCCESS(*status) ? res.orphan() : NULL;
1116 }
1117
1118 struct SResource *bin_open(struct SRBRoot *bundle, const char *tag, uint32_t length, uint8_t *data, const char* fileName, const struct UString* comment, UErrorCode *status) {
1119 LocalPointer<SResource> res(
1120 new BinaryResource(bundle, tag, length, data, fileName, comment, *status), *status);
1121 return U_SUCCESS(*status) ? res.orphan() : NULL;
1122 }
1123
1124 SRBRoot::SRBRoot(const UString *comment, UBool isPoolBundle, UErrorCode &errorCode)
1125 : fRoot(NULL), fLocale(NULL), fIndexLength(0), fMaxTableLength(0), fNoFallback(FALSE),
1126 fStringsForm(STRINGS_UTF16_V1), fIsPoolBundle(isPoolBundle),
1127 fKeys(NULL), fKeyMap(NULL),
1128 fKeysBottom(0), fKeysTop(0), fKeysCapacity(0), fKeysCount(0), fLocalKeyLimit(0),
1129 f16BitUnits(), f16BitStringsLength(0),
1130 fUsePoolBundle(&kNoPoolBundle),
1131 fPoolStringIndexLimit(0), fPoolStringIndex16Limit(0), fLocalStringIndexLimit(0),
1132 fWritePoolBundle(NULL) {
1133 if (U_FAILURE(errorCode)) {
1134 return;
1135 }
1136
1137 if (gFormatVersion > 1) {
1138 // f16BitUnits must start with a zero for empty resources.
1139 // We might be able to omit it if there are no empty 16-bit resources.
1140 f16BitUnits.append((UChar)0);
1141 }
1142
1143 fKeys = (char *) uprv_malloc(sizeof(char) * KEY_SPACE_SIZE);
1144 if (isPoolBundle) {
1145 fRoot = new PseudoListResource(this, errorCode);
1146 } else {
1147 fRoot = new TableResource(this, NULL, comment, errorCode);
1148 }
1149 if (fKeys == NULL || fRoot == NULL || U_FAILURE(errorCode)) {
1150 if (U_SUCCESS(errorCode)) {
1151 errorCode = U_MEMORY_ALLOCATION_ERROR;
1152 }
1153 return;
1154 }
1155
1156 fKeysCapacity = KEY_SPACE_SIZE;
1157 /* formatVersion 1.1 and up: start fKeysTop after the root item and indexes[] */
1158 if (gUsePoolBundle || isPoolBundle) {
1159 fIndexLength = URES_INDEX_POOL_CHECKSUM + 1;
1160 } else if (gFormatVersion >= 2) {
1161 fIndexLength = URES_INDEX_16BIT_TOP + 1;
1162 } else /* formatVersion 1 */ {
1163 fIndexLength = URES_INDEX_ATTRIBUTES + 1;
1164 }
1165 fKeysBottom = (1 /* root */ + fIndexLength) * 4;
1166 uprv_memset(fKeys, 0, fKeysBottom);
1167 fKeysTop = fKeysBottom;
1168
1169 if (gFormatVersion == 1) {
1170 fStringsForm = STRINGS_UTF16_V1;
1171 } else {
1172 fStringsForm = STRINGS_UTF16_V2;
1173 }
1174 }
1175
1176 /* Closing Functions */
1177
1178 void res_close(struct SResource *res) {
1179 delete res;
1180 }
1181
1182 SRBRoot::~SRBRoot() {
1183 delete fRoot;
1184 uprv_free(fLocale);
1185 uprv_free(fKeys);
1186 uprv_free(fKeyMap);
1187 }
1188
1189 /* Misc Functions */
1190
1191 void SRBRoot::setLocale(UChar *locale, UErrorCode &errorCode) {
1192 if(U_FAILURE(errorCode)) {
1193 return;
1194 }
1195
1196 uprv_free(fLocale);
1197 fLocale = (char*) uprv_malloc(sizeof(char) * (u_strlen(locale)+1));
1198 if(fLocale == NULL) {
1199 errorCode = U_MEMORY_ALLOCATION_ERROR;
1200 return;
1201 }
1202
1203 u_UCharsToChars(locale, fLocale, u_strlen(locale)+1);
1204 }
1205
1206 const char *
1207 SRBRoot::getKeyString(int32_t key) const {
1208 if (key < 0) {
1209 return fUsePoolBundle->fKeys + (key & 0x7fffffff);
1210 } else {
1211 return fKeys + key;
1212 }
1213 }
1214
1215 const char *
1216 SResource::getKeyString(const SRBRoot *bundle) const {
1217 if (fKey == -1) {
1218 return NULL;
1219 }
1220 return bundle->getKeyString(fKey);
1221 }
1222
1223 const char *
1224 SRBRoot::getKeyBytes(int32_t *pLength) const {
1225 *pLength = fKeysTop - fKeysBottom;
1226 return fKeys + fKeysBottom;
1227 }
1228
1229 int32_t
1230 SRBRoot::addKeyBytes(const char *keyBytes, int32_t length, UErrorCode &errorCode) {
1231 int32_t keypos;
1232
1233 if (U_FAILURE(errorCode)) {
1234 return -1;
1235 }
1236 if (length < 0 || (keyBytes == NULL && length != 0)) {
1237 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1238 return -1;
1239 }
1240 if (length == 0) {
1241 return fKeysTop;
1242 }
1243
1244 keypos = fKeysTop;
1245 fKeysTop += length;
1246 if (fKeysTop >= fKeysCapacity) {
1247 /* overflow - resize the keys buffer */
1248 fKeysCapacity += KEY_SPACE_SIZE;
1249 fKeys = static_cast<char *>(uprv_realloc(fKeys, fKeysCapacity));
1250 if(fKeys == NULL) {
1251 errorCode = U_MEMORY_ALLOCATION_ERROR;
1252 return -1;
1253 }
1254 }
1255
1256 uprv_memcpy(fKeys + keypos, keyBytes, length);
1257
1258 return keypos;
1259 }
1260
1261 int32_t
1262 SRBRoot::addTag(const char *tag, UErrorCode &errorCode) {
1263 int32_t keypos;
1264
1265 if (U_FAILURE(errorCode)) {
1266 return -1;
1267 }
1268
1269 if (tag == NULL) {
1270 /* no error: the root table and array items have no keys */
1271 return -1;
1272 }
1273
1274 keypos = addKeyBytes(tag, (int32_t)(uprv_strlen(tag) + 1), errorCode);
1275 if (U_SUCCESS(errorCode)) {
1276 ++fKeysCount;
1277 }
1278 return keypos;
1279 }
1280
1281 static int32_t
1282 compareInt32(int32_t lPos, int32_t rPos) {
1283 /*
1284 * Compare possibly-negative key offsets. Don't just return lPos - rPos
1285 * because that is prone to negative-integer underflows.
1286 */
1287 if (lPos < rPos) {
1288 return -1;
1289 } else if (lPos > rPos) {
1290 return 1;
1291 } else {
1292 return 0;
1293 }
1294 }
1295
1296 static int32_t U_CALLCONV
1297 compareKeySuffixes(const void *context, const void *l, const void *r) {
1298 const struct SRBRoot *bundle=(const struct SRBRoot *)context;
1299 int32_t lPos = ((const KeyMapEntry *)l)->oldpos;
1300 int32_t rPos = ((const KeyMapEntry *)r)->oldpos;
1301 const char *lStart = bundle->getKeyString(lPos);
1302 const char *lLimit = lStart;
1303 const char *rStart = bundle->getKeyString(rPos);
1304 const char *rLimit = rStart;
1305 int32_t diff;
1306 while (*lLimit != 0) { ++lLimit; }
1307 while (*rLimit != 0) { ++rLimit; }
1308 /* compare keys in reverse character order */
1309 while (lStart < lLimit && rStart < rLimit) {
1310 diff = (int32_t)(uint8_t)*--lLimit - (int32_t)(uint8_t)*--rLimit;
1311 if (diff != 0) {
1312 return diff;
1313 }
1314 }
1315 /* sort equal suffixes by descending key length */
1316 diff = (int32_t)(rLimit - rStart) - (int32_t)(lLimit - lStart);
1317 if (diff != 0) {
1318 return diff;
1319 }
1320 /* Sort pool bundle keys first (negative oldpos), and otherwise keys in parsing order. */
1321 return compareInt32(lPos, rPos);
1322 }
1323
1324 static int32_t U_CALLCONV
1325 compareKeyNewpos(const void * /*context*/, const void *l, const void *r) {
1326 return compareInt32(((const KeyMapEntry *)l)->newpos, ((const KeyMapEntry *)r)->newpos);
1327 }
1328
1329 static int32_t U_CALLCONV
1330 compareKeyOldpos(const void * /*context*/, const void *l, const void *r) {
1331 return compareInt32(((const KeyMapEntry *)l)->oldpos, ((const KeyMapEntry *)r)->oldpos);
1332 }
1333
1334 void
1335 SRBRoot::compactKeys(UErrorCode &errorCode) {
1336 KeyMapEntry *map;
1337 char *keys;
1338 int32_t i;
1339 int32_t keysCount = fUsePoolBundle->fKeysCount + fKeysCount;
1340 if (U_FAILURE(errorCode) || fKeysCount == 0 || fKeyMap != NULL) {
1341 return;
1342 }
1343 map = (KeyMapEntry *)uprv_malloc(keysCount * sizeof(KeyMapEntry));
1344 if (map == NULL) {
1345 errorCode = U_MEMORY_ALLOCATION_ERROR;
1346 return;
1347 }
1348 keys = (char *)fUsePoolBundle->fKeys;
1349 for (i = 0; i < fUsePoolBundle->fKeysCount; ++i) {
1350 map[i].oldpos =
1351 (int32_t)(keys - fUsePoolBundle->fKeys) | 0x80000000; /* negative oldpos */
1352 map[i].newpos = 0;
1353 while (*keys != 0) { ++keys; } /* skip the key */
1354 ++keys; /* skip the NUL */
1355 }
1356 keys = fKeys + fKeysBottom;
1357 for (; i < keysCount; ++i) {
1358 map[i].oldpos = (int32_t)(keys - fKeys);
1359 map[i].newpos = 0;
1360 while (*keys != 0) { ++keys; } /* skip the key */
1361 ++keys; /* skip the NUL */
1362 }
1363 /* Sort the keys so that each one is immediately followed by all of its suffixes. */
1364 uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
1365 compareKeySuffixes, this, FALSE, &errorCode);
1366 /*
1367 * Make suffixes point into earlier, longer strings that contain them
1368 * and mark the old, now unused suffix bytes as deleted.
1369 */
1370 if (U_SUCCESS(errorCode)) {
1371 keys = fKeys;
1372 for (i = 0; i < keysCount;) {
1373 /*
1374 * This key is not a suffix of the previous one;
1375 * keep this one and delete the following ones that are
1376 * suffixes of this one.
1377 */
1378 const char *key;
1379 const char *keyLimit;
1380 int32_t j = i + 1;
1381 map[i].newpos = map[i].oldpos;
1382 if (j < keysCount && map[j].oldpos < 0) {
1383 /* Key string from the pool bundle, do not delete. */
1384 i = j;
1385 continue;
1386 }
1387 key = getKeyString(map[i].oldpos);
1388 for (keyLimit = key; *keyLimit != 0; ++keyLimit) {}
1389 for (; j < keysCount && map[j].oldpos >= 0; ++j) {
1390 const char *k;
1391 char *suffix;
1392 const char *suffixLimit;
1393 int32_t offset;
1394 suffix = keys + map[j].oldpos;
1395 for (suffixLimit = suffix; *suffixLimit != 0; ++suffixLimit) {}
1396 offset = (int32_t)(keyLimit - key) - (suffixLimit - suffix);
1397 if (offset < 0) {
1398 break; /* suffix cannot be longer than the original */
1399 }
1400 /* Is it a suffix of the earlier, longer key? */
1401 for (k = keyLimit; suffix < suffixLimit && *--k == *--suffixLimit;) {}
1402 if (suffix == suffixLimit && *k == *suffixLimit) {
1403 map[j].newpos = map[i].oldpos + offset; /* yes, point to the earlier key */
1404 /* mark the suffix as deleted */
1405 while (*suffix != 0) { *suffix++ = 1; }
1406 *suffix = 1;
1407 } else {
1408 break; /* not a suffix, restart from here */
1409 }
1410 }
1411 i = j;
1412 }
1413 /*
1414 * Re-sort by newpos, then modify the key characters array in-place
1415 * to squeeze out unused bytes, and readjust the newpos offsets.
1416 */
1417 uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
1418 compareKeyNewpos, NULL, FALSE, &errorCode);
1419 if (U_SUCCESS(errorCode)) {
1420 int32_t oldpos, newpos, limit;
1421 oldpos = newpos = fKeysBottom;
1422 limit = fKeysTop;
1423 /* skip key offsets that point into the pool bundle rather than this new bundle */
1424 for (i = 0; i < keysCount && map[i].newpos < 0; ++i) {}
1425 if (i < keysCount) {
1426 while (oldpos < limit) {
1427 if (keys[oldpos] == 1) {
1428 ++oldpos; /* skip unused bytes */
1429 } else {
1430 /* adjust the new offsets for keys starting here */
1431 while (i < keysCount && map[i].newpos == oldpos) {
1432 map[i++].newpos = newpos;
1433 }
1434 /* move the key characters to their new position */
1435 keys[newpos++] = keys[oldpos++];
1436 }
1437 }
1438 assert(i == keysCount);
1439 }
1440 fKeysTop = newpos;
1441 /* Re-sort once more, by old offsets for binary searching. */
1442 uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
1443 compareKeyOldpos, NULL, FALSE, &errorCode);
1444 if (U_SUCCESS(errorCode)) {
1445 /* key size reduction by limit - newpos */
1446 fKeyMap = map;
1447 map = NULL;
1448 }
1449 }
1450 }
1451 uprv_free(map);
1452 }
1453
1454 static int32_t U_CALLCONV
1455 compareStringSuffixes(const void * /*context*/, const void *l, const void *r) {
1456 const StringResource *left = *((const StringResource **)l);
1457 const StringResource *right = *((const StringResource **)r);
1458 const UChar *lStart = left->getBuffer();
1459 const UChar *lLimit = lStart + left->length();
1460 const UChar *rStart = right->getBuffer();
1461 const UChar *rLimit = rStart + right->length();
1462 int32_t diff;
1463 /* compare keys in reverse character order */
1464 while (lStart < lLimit && rStart < rLimit) {
1465 diff = (int32_t)*--lLimit - (int32_t)*--rLimit;
1466 if (diff != 0) {
1467 return diff;
1468 }
1469 }
1470 /* sort equal suffixes by descending string length */
1471 return right->length() - left->length();
1472 }
1473
1474 static int32_t U_CALLCONV
1475 compareStringLengths(const void * /*context*/, const void *l, const void *r) {
1476 const StringResource *left = *((const StringResource **)l);
1477 const StringResource *right = *((const StringResource **)r);
1478 int32_t diff;
1479 /* Make "is suffix of another string" compare greater than a non-suffix. */
1480 diff = (int)(left->fSame != NULL) - (int)(right->fSame != NULL);
1481 if (diff != 0) {
1482 return diff;
1483 }
1484 /* sort by ascending string length */
1485 diff = left->length() - right->length();
1486 if (diff != 0) {
1487 return diff;
1488 }
1489 // sort by descending size reduction
1490 diff = right->fNumUnitsSaved - left->fNumUnitsSaved;
1491 if (diff != 0) {
1492 return diff;
1493 }
1494 // sort lexically
1495 return left->fString.compare(right->fString);
1496 }
1497
1498 void
1499 StringResource::writeUTF16v2(int32_t base, UnicodeString &dest) {
1500 int32_t len = length();
1501 fRes = URES_MAKE_RESOURCE(URES_STRING_V2, base + dest.length());
1502 fWritten = TRUE;
1503 switch(fNumCharsForLength) {
1504 case 0:
1505 break;
1506 case 1:
1507 dest.append((UChar)(0xdc00 + len));
1508 break;
1509 case 2:
1510 dest.append((UChar)(0xdfef + (len >> 16)));
1511 dest.append((UChar)len);
1512 break;
1513 case 3:
1514 dest.append((UChar)0xdfff);
1515 dest.append((UChar)(len >> 16));
1516 dest.append((UChar)len);
1517 break;
1518 default:
1519 break; /* will not occur */
1520 }
1521 dest.append(fString);
1522 dest.append((UChar)0);
1523 }
1524
1525 void
1526 SRBRoot::compactStringsV2(UHashtable *stringSet, UErrorCode &errorCode) {
1527 if (U_FAILURE(errorCode)) {
1528 return;
1529 }
1530 // Store the StringResource pointers in an array for
1531 // easy sorting and processing.
1532 // We enumerate a set of strings, so there are no duplicates.
1533 int32_t count = uhash_count(stringSet);
1534 LocalArray<StringResource *> array(new StringResource *[count], errorCode);
1535 if (U_FAILURE(errorCode)) {
1536 return;
1537 }
1538 for (int32_t pos = UHASH_FIRST, i = 0; i < count; ++i) {
1539 array[i] = (StringResource *)uhash_nextElement(stringSet, &pos)->key.pointer;
1540 }
1541 /* Sort the strings so that each one is immediately followed by all of its suffixes. */
1542 uprv_sortArray(array.getAlias(), count, (int32_t)sizeof(struct SResource **),
1543 compareStringSuffixes, NULL, FALSE, &errorCode);
1544 if (U_FAILURE(errorCode)) {
1545 return;
1546 }
1547 /*
1548 * Make suffixes point into earlier, longer strings that contain them.
1549 * Temporarily use fSame and fSuffixOffset for suffix strings to
1550 * refer to the remaining ones.
1551 */
1552 for (int32_t i = 0; i < count;) {
1553 /*
1554 * This string is not a suffix of the previous one;
1555 * write this one and subsume the following ones that are
1556 * suffixes of this one.
1557 */
1558 StringResource *res = array[i];
1559 res->fNumUnitsSaved = (res->fNumCopies - 1) * res->get16BitStringsLength();
1560 // Whole duplicates of pool strings are already account for in fPoolStringIndexLimit,
1561 // see StringResource::handlePreflightStrings().
1562 int32_t j;
1563 for (j = i + 1; j < count; ++j) {
1564 StringResource *suffixRes = array[j];
1565 /* Is it a suffix of the earlier, longer string? */
1566 if (res->fString.endsWith(suffixRes->fString)) {
1567 assert(res->length() != suffixRes->length()); // Set strings are unique.
1568 if (suffixRes->fWritten) {
1569 // Pool string, skip.
1570 } else if (suffixRes->fNumCharsForLength == 0) {
1571 /* yes, point to the earlier string */
1572 suffixRes->fSame = res;
1573 suffixRes->fSuffixOffset = res->length() - suffixRes->length();
1574 if (res->fWritten) {
1575 // Suffix-share res which is a pool string.
1576 // Compute the resource word and collect the maximum.
1577 suffixRes->fRes =
1578 res->fRes + res->fNumCharsForLength + suffixRes->fSuffixOffset;
1579 int32_t poolStringIndex = (int32_t)RES_GET_OFFSET(suffixRes->fRes);
1580 if (poolStringIndex >= fPoolStringIndexLimit) {
1581 fPoolStringIndexLimit = poolStringIndex + 1;
1582 }
1583 suffixRes->fWritten = TRUE;
1584 }
1585 res->fNumUnitsSaved += suffixRes->fNumCopies * suffixRes->get16BitStringsLength();
1586 } else {
1587 /* write the suffix by itself if we need explicit length */
1588 }
1589 } else {
1590 break; /* not a suffix, restart from here */
1591 }
1592 }
1593 i = j;
1594 }
1595 /*
1596 * Re-sort the strings by ascending length (except suffixes last)
1597 * to optimize for URES_TABLE16 and URES_ARRAY16:
1598 * Keep as many as possible within reach of 16-bit offsets.
1599 */
1600 uprv_sortArray(array.getAlias(), count, (int32_t)sizeof(struct SResource **),
1601 compareStringLengths, NULL, FALSE, &errorCode);
1602 if (U_FAILURE(errorCode)) {
1603 return;
1604 }
1605 if (fIsPoolBundle) {
1606 // Write strings that are sufficiently shared.
1607 // Avoid writing other strings.
1608 int32_t numStringsWritten = 0;
1609 int32_t numUnitsSaved = 0;
1610 int32_t numUnitsNotSaved = 0;
1611 for (int32_t i = 0; i < count; ++i) {
1612 StringResource *res = array[i];
1613 // Maximum pool string index when suffix-sharing the last character.
1614 int32_t maxStringIndex =
1615 f16BitUnits.length() + res->fNumCharsForLength + res->length() - 1;
1616 if (res->fNumUnitsSaved >= GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING &&
1617 maxStringIndex < RES_MAX_OFFSET) {
1618 res->writeUTF16v2(0, f16BitUnits);
1619 ++numStringsWritten;
1620 numUnitsSaved += res->fNumUnitsSaved;
1621 } else {
1622 numUnitsNotSaved += res->fNumUnitsSaved;
1623 res->fRes = URES_MAKE_EMPTY_RESOURCE(URES_STRING);
1624 res->fWritten = TRUE;
1625 }
1626 }
1627 if (f16BitUnits.isBogus()) {
1628 errorCode = U_MEMORY_ALLOCATION_ERROR;
1629 }
1630 if (getShowWarning()) { // not quiet
1631 printf("number of shared strings: %d\n", (int)numStringsWritten);
1632 printf("16-bit units for strings: %6d = %6d bytes\n",
1633 (int)f16BitUnits.length(), (int)f16BitUnits.length() * 2);
1634 printf("16-bit units saved: %6d = %6d bytes\n",
1635 (int)numUnitsSaved, (int)numUnitsSaved * 2);
1636 printf("16-bit units not saved: %6d = %6d bytes\n",
1637 (int)numUnitsNotSaved, (int)numUnitsNotSaved * 2);
1638 }
1639 } else {
1640 assert(fPoolStringIndexLimit <= fUsePoolBundle->fStringIndexLimit);
1641 /* Write the non-suffix strings. */
1642 int32_t i;
1643 for (i = 0; i < count && array[i]->fSame == NULL; ++i) {
1644 StringResource *res = array[i];
1645 if (!res->fWritten) {
1646 int32_t localStringIndex = f16BitUnits.length();
1647 if (localStringIndex >= fLocalStringIndexLimit) {
1648 fLocalStringIndexLimit = localStringIndex + 1;
1649 }
1650 res->writeUTF16v2(fPoolStringIndexLimit, f16BitUnits);
1651 }
1652 }
1653 if (f16BitUnits.isBogus()) {
1654 errorCode = U_MEMORY_ALLOCATION_ERROR;
1655 return;
1656 }
1657 if (fWritePoolBundle != NULL && gFormatVersion >= 3) {
1658 PseudoListResource *poolStrings =
1659 static_cast<PseudoListResource *>(fWritePoolBundle->fRoot);
1660 for (i = 0; i < count && array[i]->fSame == NULL; ++i) {
1661 assert(!array[i]->fString.isEmpty());
1662 StringResource *poolString =
1663 new StringResource(fWritePoolBundle, array[i]->fString, errorCode);
1664 if (poolString == NULL) {
1665 errorCode = U_MEMORY_ALLOCATION_ERROR;
1666 break;
1667 }
1668 poolStrings->add(poolString);
1669 }
1670 }
1671 /* Write the suffix strings. Make each point to the real string. */
1672 for (; i < count; ++i) {
1673 StringResource *res = array[i];
1674 if (res->fWritten) {
1675 continue;
1676 }
1677 StringResource *same = res->fSame;
1678 assert(res->length() != same->length()); // Set strings are unique.
1679 res->fRes = same->fRes + same->fNumCharsForLength + res->fSuffixOffset;
1680 int32_t localStringIndex = (int32_t)RES_GET_OFFSET(res->fRes) - fPoolStringIndexLimit;
1681 // Suffixes of pool strings have been set already.
1682 assert(localStringIndex >= 0);
1683 if (localStringIndex >= fLocalStringIndexLimit) {
1684 fLocalStringIndexLimit = localStringIndex + 1;
1685 }
1686 res->fWritten = TRUE;
1687 }
1688 }
1689 // +1 to account for the initial zero in f16BitUnits
1690 assert(f16BitUnits.length() <= (f16BitStringsLength + 1));
1691 }