1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2000-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
13 * Modification History:
15 * Date Name Description
16 * 02/21/00 weiv Creation.
17 *******************************************************************************
20 // Safer use of UnicodeString.
21 #ifndef UNISTR_FROM_CHAR_EXPLICIT
22 # define UNISTR_FROM_CHAR_EXPLICIT explicit
25 // Less important, but still a good idea.
26 #ifndef UNISTR_FROM_STRING_EXPLICIT
27 # define UNISTR_FROM_STRING_EXPLICIT explicit
35 #include "unicode/localpointer.h"
38 #include "unicode/ures.h"
39 #include "unicode/putil.h"
48 #include "unicode/utf16.h"
52 * Align binary data at a 16-byte offset from the start of the resource bundle,
53 * to be safe for any data type it may contain.
55 #define BIN_ALIGNMENT 16
57 // This numeric constant must be at least 1.
58 // If StringResource.fNumUnitsSaved == 0 then the string occurs only once,
59 // and it makes no sense to move it to the pool bundle.
60 // The larger the threshold for fNumUnitsSaved
61 // the smaller the savings, and the smaller the pool bundle.
62 // We trade some total size reduction to reduce the pool bundle a bit,
63 // so that one can reasonably save data size by
64 // removing bundle files without rebuilding the pool bundle.
65 // This can also help to keep the pool and total (pool+local) string indexes
66 // within 16 bits, that is, within range of Table16 and Array16 containers.
67 #ifndef GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING
68 # define GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING 10
73 static UBool gIncludeCopyright
= FALSE
;
74 static UBool gUsePoolBundle
= FALSE
;
75 static UBool gIsDefaultFormatVersion
= TRUE
;
76 static int32_t gFormatVersion
= 3;
78 /* How do we store string values? */
80 STRINGS_UTF16_V1
, /* formatVersion 1: int length + UChars + NUL + padding to 4 bytes */
81 STRINGS_UTF16_V2
/* formatVersion 2 & up: optional length in 1..3 UChars + UChars + NUL */
84 static const int32_t MAX_IMPLICIT_STRING_LENGTH
= 40; /* do not store the length explicitly for such strings */
86 static const ResFile kNoPoolBundle
;
89 * res_none() returns the address of kNoResource,
90 * for use in non-error cases when no resource is to be added to the bundle.
91 * (NULL is used in error cases.)
93 static SResource kNoResource
; // TODO: const
95 static UDataInfo dataInfo
= {
104 {0x52, 0x65, 0x73, 0x42}, /* dataFormat="ResB" */
105 {1, 3, 0, 0}, /* formatVersion */
106 {1, 4, 0, 0} /* dataVersion take a look at version inside parsed resb*/
109 static const UVersionInfo gFormatVersions
[4] = { /* indexed by a major-formatVersion integer */
115 // Remember to update genrb.h GENRB_VERSION when changing the data format.
116 // (Or maybe we should remove GENRB_VERSION and report the ICU version number?)
118 static uint8_t calcPadding(uint32_t size
) {
119 /* returns space we need to pad */
120 return (uint8_t) ((size
% sizeof(uint32_t)) ? (sizeof(uint32_t) - (size
% sizeof(uint32_t))) : 0);
124 void setIncludeCopyright(UBool val
){
125 gIncludeCopyright
=val
;
128 UBool
getIncludeCopyright(void){
129 return gIncludeCopyright
;
132 void setFormatVersion(int32_t formatVersion
) {
133 gIsDefaultFormatVersion
= FALSE
;
134 gFormatVersion
= formatVersion
;
137 int32_t getFormatVersion() {
138 return gFormatVersion
;
141 void setUsePoolBundle(UBool use
) {
142 gUsePoolBundle
= use
;
145 // TODO: return const pointer, or find another way to express "none"
146 struct SResource
* res_none() {
150 SResource::SResource()
151 : fType(URES_NONE
), fWritten(FALSE
), fRes(RES_BOGUS
), fRes16(-1), fKey(-1), fKey16(-1),
152 line(0), fNext(NULL
) {
153 ustr_init(&fComment
);
156 SResource::SResource(SRBRoot
*bundle
, const char *tag
, int8_t type
, const UString
* comment
,
157 UErrorCode
&errorCode
)
158 : fType(type
), fWritten(FALSE
), fRes(RES_BOGUS
), fRes16(-1),
159 fKey(bundle
!= NULL
? bundle
->addTag(tag
, errorCode
) : -1), fKey16(-1),
160 line(0), fNext(NULL
) {
161 ustr_init(&fComment
);
162 if(comment
!= NULL
) {
163 ustr_cpy(&fComment
, comment
, &errorCode
);
167 SResource::~SResource() {
168 ustr_deinit(&fComment
);
171 ContainerResource::~ContainerResource() {
172 SResource
*current
= fFirst
;
173 while (current
!= NULL
) {
174 SResource
*next
= current
->fNext
;
180 TableResource::~TableResource() {}
182 // TODO: clarify that containers adopt new items, even in error cases; use LocalPointer
183 void TableResource::add(SResource
*res
, int linenumber
, UErrorCode
&errorCode
) {
184 if (U_FAILURE(errorCode
) || res
== NULL
|| res
== &kNoResource
) {
188 /* remember this linenumber to report to the user if there is a duplicate key */
189 res
->line
= linenumber
;
191 /* here we need to traverse the list */
194 /* is the list still empty? */
195 if (fFirst
== NULL
) {
201 const char *resKeyString
= fRoot
->fKeys
+ res
->fKey
;
203 SResource
*current
= fFirst
;
205 SResource
*prev
= NULL
;
206 while (current
!= NULL
) {
207 const char *currentKeyString
= fRoot
->fKeys
+ current
->fKey
;
210 * formatVersion 1: compare key strings in native-charset order
211 * formatVersion 2 and up: compare key strings in ASCII order
213 if (gFormatVersion
== 1 || U_CHARSET_FAMILY
== U_ASCII_FAMILY
) {
214 diff
= uprv_strcmp(currentKeyString
, resKeyString
);
216 diff
= uprv_compareInvCharsAsAscii(currentKeyString
, resKeyString
);
220 current
= current
->fNext
;
221 } else if (diff
> 0) {
222 /* we're either in front of the list, or in the middle */
224 /* front of the list */
227 /* middle of the list */
231 res
->fNext
= current
;
234 /* Key already exists! ERROR! */
235 error(linenumber
, "duplicate key '%s' in table, first appeared at line %d", currentKeyString
, current
->line
);
236 errorCode
= U_UNSUPPORTED_ERROR
;
246 ArrayResource::~ArrayResource() {}
248 void ArrayResource::add(SResource
*res
) {
249 if (res
!= NULL
&& res
!= &kNoResource
) {
250 if (fFirst
== NULL
) {
260 PseudoListResource::~PseudoListResource() {}
262 void PseudoListResource::add(SResource
*res
) {
263 if (res
!= NULL
&& res
!= &kNoResource
) {
270 StringBaseResource::StringBaseResource(SRBRoot
*bundle
, const char *tag
, int8_t type
,
271 const UChar
*value
, int32_t len
,
272 const UString
* comment
, UErrorCode
&errorCode
)
273 : SResource(bundle
, tag
, type
, comment
, errorCode
) {
274 if (len
== 0 && gFormatVersion
> 1) {
275 fRes
= URES_MAKE_EMPTY_RESOURCE(type
);
280 fString
.setTo(ConstChar16Ptr(value
), len
);
281 fString
.getTerminatedBuffer(); // Some code relies on NUL-termination.
282 if (U_SUCCESS(errorCode
) && fString
.isBogus()) {
283 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
287 StringBaseResource::StringBaseResource(SRBRoot
*bundle
, int8_t type
,
288 const icu::UnicodeString
&value
, UErrorCode
&errorCode
)
289 : SResource(bundle
, NULL
, type
, NULL
, errorCode
), fString(value
) {
290 if (value
.isEmpty() && gFormatVersion
> 1) {
291 fRes
= URES_MAKE_EMPTY_RESOURCE(type
);
296 fString
.getTerminatedBuffer(); // Some code relies on NUL-termination.
297 if (U_SUCCESS(errorCode
) && fString
.isBogus()) {
298 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
302 // Pool bundle string, alias the buffer. Guaranteed NUL-terminated and not empty.
303 StringBaseResource::StringBaseResource(int8_t type
, const UChar
*value
, int32_t len
,
304 UErrorCode
&errorCode
)
305 : SResource(NULL
, NULL
, type
, NULL
, errorCode
), fString(TRUE
, value
, len
) {
307 assert(!fString
.isBogus());
310 StringBaseResource::~StringBaseResource() {}
312 static int32_t U_CALLCONV
313 string_hash(const UElement key
) {
314 const StringResource
*res
= static_cast<const StringResource
*>(key
.pointer
);
315 return res
->fString
.hashCode();
318 static UBool U_CALLCONV
319 string_comp(const UElement key1
, const UElement key2
) {
320 const StringResource
*res1
= static_cast<const StringResource
*>(key1
.pointer
);
321 const StringResource
*res2
= static_cast<const StringResource
*>(key2
.pointer
);
322 return res1
->fString
== res2
->fString
;
325 StringResource::~StringResource() {}
327 AliasResource::~AliasResource() {}
329 IntResource::IntResource(SRBRoot
*bundle
, const char *tag
, int32_t value
,
330 const UString
* comment
, UErrorCode
&errorCode
)
331 : SResource(bundle
, tag
, URES_INT
, comment
, errorCode
) {
333 fRes
= URES_MAKE_RESOURCE(URES_INT
, value
& RES_MAX_OFFSET
);
337 IntResource::~IntResource() {}
339 IntVectorResource::IntVectorResource(SRBRoot
*bundle
, const char *tag
,
340 const UString
* comment
, UErrorCode
&errorCode
)
341 : SResource(bundle
, tag
, URES_INT_VECTOR
, comment
, errorCode
),
342 fCount(0), fArray(new uint32_t[RESLIST_MAX_INT_VECTOR
]) {
343 if (fArray
== NULL
) {
344 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
349 IntVectorResource::~IntVectorResource() {
353 void IntVectorResource::add(int32_t value
, UErrorCode
&errorCode
) {
354 if (U_SUCCESS(errorCode
)) {
355 fArray
[fCount
++] = value
;
359 BinaryResource::BinaryResource(SRBRoot
*bundle
, const char *tag
,
360 uint32_t length
, uint8_t *data
, const char* fileName
,
361 const UString
* comment
, UErrorCode
&errorCode
)
362 : SResource(bundle
, tag
, URES_BINARY
, comment
, errorCode
),
363 fLength(length
), fData(NULL
), fFileName(NULL
) {
364 if (U_FAILURE(errorCode
)) {
367 if (fileName
!= NULL
&& *fileName
!= 0){
368 fFileName
= new char[uprv_strlen(fileName
)+1];
369 if (fFileName
== NULL
) {
370 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
373 uprv_strcpy(fFileName
, fileName
);
376 fData
= new uint8_t[length
];
378 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
381 uprv_memcpy(fData
, data
, length
);
383 if (gFormatVersion
> 1) {
384 fRes
= URES_MAKE_EMPTY_RESOURCE(URES_BINARY
);
390 BinaryResource::~BinaryResource() {
395 /* Writing Functions */
398 StringResource::handlePreflightStrings(SRBRoot
*bundle
, UHashtable
*stringSet
,
399 UErrorCode
&errorCode
) {
400 assert(fSame
== NULL
);
401 fSame
= static_cast<StringResource
*>(uhash_get(stringSet
, this));
403 // This is a duplicate of a pool bundle string or of an earlier-visited string.
404 if (++fSame
->fNumCopies
== 1) {
405 assert(fSame
->fWritten
);
406 int32_t poolStringIndex
= (int32_t)RES_GET_OFFSET(fSame
->fRes
);
407 if (poolStringIndex
>= bundle
->fPoolStringIndexLimit
) {
408 bundle
->fPoolStringIndexLimit
= poolStringIndex
+ 1;
413 /* Put this string into the set for finding duplicates. */
415 uhash_put(stringSet
, this, this, &errorCode
);
417 if (bundle
->fStringsForm
!= STRINGS_UTF16_V1
) {
418 int32_t len
= length();
419 if (len
<= MAX_IMPLICIT_STRING_LENGTH
&&
420 !U16_IS_TRAIL(fString
[0]) && fString
.indexOf((UChar
)0) < 0) {
422 * This string will be stored without an explicit length.
423 * Runtime will detect !U16_IS_TRAIL(s[0]) and call u_strlen().
425 fNumCharsForLength
= 0;
426 } else if (len
<= 0x3ee) {
427 fNumCharsForLength
= 1;
428 } else if (len
<= 0xfffff) {
429 fNumCharsForLength
= 2;
431 fNumCharsForLength
= 3;
433 bundle
->f16BitStringsLength
+= fNumCharsForLength
+ len
+ 1; /* +1 for the NUL */
438 ContainerResource::handlePreflightStrings(SRBRoot
*bundle
, UHashtable
*stringSet
,
439 UErrorCode
&errorCode
) {
440 for (SResource
*current
= fFirst
; current
!= NULL
; current
= current
->fNext
) {
441 current
->preflightStrings(bundle
, stringSet
, errorCode
);
446 SResource::preflightStrings(SRBRoot
*bundle
, UHashtable
*stringSet
, UErrorCode
&errorCode
) {
447 if (U_FAILURE(errorCode
)) {
450 if (fRes
!= RES_BOGUS
) {
452 * The resource item word was already precomputed, which means
453 * no further data needs to be written.
454 * This might be an integer, or an empty string/binary/etc.
458 handlePreflightStrings(bundle
, stringSet
, errorCode
);
462 SResource::handlePreflightStrings(SRBRoot
* /*bundle*/, UHashtable
* /*stringSet*/,
463 UErrorCode
& /*errorCode*/) {
464 /* Neither a string nor a container. */
468 SRBRoot::makeRes16(uint32_t resWord
) const {
470 return 0; /* empty string */
472 uint32_t type
= RES_GET_TYPE(resWord
);
473 int32_t offset
= (int32_t)RES_GET_OFFSET(resWord
);
474 if (type
== URES_STRING_V2
) {
476 if (offset
< fPoolStringIndexLimit
) {
477 if (offset
< fPoolStringIndex16Limit
) {
481 offset
= offset
- fPoolStringIndexLimit
+ fPoolStringIndex16Limit
;
482 if (offset
<= 0xffff) {
491 SRBRoot::mapKey(int32_t oldpos
) const {
492 const KeyMapEntry
*map
= fKeyMap
;
496 int32_t i
, start
, limit
;
498 /* do a binary search for the old, pre-compactKeys() key offset */
499 start
= fUsePoolBundle
->fKeysCount
;
500 limit
= start
+ fKeysCount
;
501 while (start
< limit
- 1) {
502 i
= (start
+ limit
) / 2;
503 if (oldpos
< map
[i
].oldpos
) {
509 assert(oldpos
== map
[start
].oldpos
);
510 return map
[start
].newpos
;
514 * Only called for UTF-16 v1 strings and duplicate UTF-16 v2 strings.
515 * For unique UTF-16 v2 strings, write16() sees fRes != RES_BOGUS
519 StringResource::handleWrite16(SRBRoot
* /*bundle*/) {
521 if ((same
= fSame
) != NULL
) {
522 /* This is a duplicate. */
523 assert(same
->fRes
!= RES_BOGUS
&& same
->fWritten
);
525 fWritten
= same
->fWritten
;
530 ContainerResource::writeAllRes16(SRBRoot
*bundle
) {
531 for (SResource
*current
= fFirst
; current
!= NULL
; current
= current
->fNext
) {
532 bundle
->f16BitUnits
.append((UChar
)current
->fRes16
);
538 ArrayResource::handleWrite16(SRBRoot
*bundle
) {
539 if (fCount
== 0 && gFormatVersion
> 1) {
540 fRes
= URES_MAKE_EMPTY_RESOURCE(URES_ARRAY
);
546 for (SResource
*current
= fFirst
; current
!= NULL
; current
= current
->fNext
) {
547 current
->write16(bundle
);
548 res16
|= current
->fRes16
;
550 if (fCount
<= 0xffff && res16
>= 0 && gFormatVersion
> 1) {
551 fRes
= URES_MAKE_RESOURCE(URES_ARRAY16
, bundle
->f16BitUnits
.length());
552 bundle
->f16BitUnits
.append((UChar
)fCount
);
553 writeAllRes16(bundle
);
558 TableResource::handleWrite16(SRBRoot
*bundle
) {
559 if (fCount
== 0 && gFormatVersion
> 1) {
560 fRes
= URES_MAKE_EMPTY_RESOURCE(URES_TABLE
);
564 /* Find the smallest table type that fits the data. */
567 for (SResource
*current
= fFirst
; current
!= NULL
; current
= current
->fNext
) {
568 current
->write16(bundle
);
569 key16
|= current
->fKey16
;
570 res16
|= current
->fRes16
;
572 if(fCount
> (uint32_t)bundle
->fMaxTableLength
) {
573 bundle
->fMaxTableLength
= fCount
;
575 if (fCount
<= 0xffff && key16
>= 0) {
576 if (res16
>= 0 && gFormatVersion
> 1) {
577 /* 16-bit count, key offsets and values */
578 fRes
= URES_MAKE_RESOURCE(URES_TABLE16
, bundle
->f16BitUnits
.length());
579 bundle
->f16BitUnits
.append((UChar
)fCount
);
580 for (SResource
*current
= fFirst
; current
!= NULL
; current
= current
->fNext
) {
581 bundle
->f16BitUnits
.append((UChar
)current
->fKey16
);
583 writeAllRes16(bundle
);
585 /* 16-bit count, 16-bit key offsets, 32-bit values */
586 fTableType
= URES_TABLE
;
589 /* 32-bit count, key offsets and values */
590 fTableType
= URES_TABLE32
;
595 PseudoListResource::handleWrite16(SRBRoot
* /*bundle*/) {
596 fRes
= URES_MAKE_EMPTY_RESOURCE(URES_TABLE
);
601 SResource::write16(SRBRoot
*bundle
) {
603 // A tagged resource has a non-negative key index into the parsed key strings.
604 // compactKeys() built a map from parsed key index to the final key index.
605 // After the mapping, negative key indexes are used for shared pool bundle keys.
606 fKey
= bundle
->mapKey(fKey
);
607 // If the key index fits into a Key16 for a Table or Table16,
608 // then set the fKey16 field accordingly.
609 // Otherwise keep it at -1.
611 if (fKey
< bundle
->fLocalKeyLimit
) {
615 int32_t poolKeyIndex
= fKey
& 0x7fffffff;
616 if (poolKeyIndex
<= 0xffff) {
617 poolKeyIndex
+= bundle
->fLocalKeyLimit
;
618 if (poolKeyIndex
<= 0xffff) {
619 fKey16
= poolKeyIndex
;
626 * The resource item word was already precomputed, which means
627 * no further data needs to be written.
628 * This might be an integer, or an empty or UTF-16 v2 string,
629 * an empty binary, etc.
631 if (fRes
== RES_BOGUS
) {
632 handleWrite16(bundle
);
634 // Compute fRes16 for precomputed as well as just-computed fRes.
635 fRes16
= bundle
->makeRes16(fRes
);
639 SResource::handleWrite16(SRBRoot
* /*bundle*/) {
640 /* Only a few resource types write 16-bit units. */
644 * Only called for UTF-16 v1 strings, and for aliases.
645 * For UTF-16 v2 strings, preWrite() sees fRes != RES_BOGUS
649 StringBaseResource::handlePreWrite(uint32_t *byteOffset
) {
650 /* Write the UTF-16 v1 string. */
651 fRes
= URES_MAKE_RESOURCE(fType
, *byteOffset
>> 2);
652 *byteOffset
+= 4 + (length() + 1) * U_SIZEOF_UCHAR
;
656 IntVectorResource::handlePreWrite(uint32_t *byteOffset
) {
657 if (fCount
== 0 && gFormatVersion
> 1) {
658 fRes
= URES_MAKE_EMPTY_RESOURCE(URES_INT_VECTOR
);
661 fRes
= URES_MAKE_RESOURCE(URES_INT_VECTOR
, *byteOffset
>> 2);
662 *byteOffset
+= (1 + fCount
) * 4;
667 BinaryResource::handlePreWrite(uint32_t *byteOffset
) {
669 uint32_t dataStart
= *byteOffset
+ sizeof(fLength
);
671 if (dataStart
% BIN_ALIGNMENT
) {
672 pad
= (BIN_ALIGNMENT
- dataStart
% BIN_ALIGNMENT
);
673 *byteOffset
+= pad
; /* pad == 4 or 8 or 12 */
675 fRes
= URES_MAKE_RESOURCE(URES_BINARY
, *byteOffset
>> 2);
676 *byteOffset
+= 4 + fLength
;
680 ContainerResource::preWriteAllRes(uint32_t *byteOffset
) {
681 for (SResource
*current
= fFirst
; current
!= NULL
; current
= current
->fNext
) {
682 current
->preWrite(byteOffset
);
687 ArrayResource::handlePreWrite(uint32_t *byteOffset
) {
688 preWriteAllRes(byteOffset
);
689 fRes
= URES_MAKE_RESOURCE(URES_ARRAY
, *byteOffset
>> 2);
690 *byteOffset
+= (1 + fCount
) * 4;
694 TableResource::handlePreWrite(uint32_t *byteOffset
) {
695 preWriteAllRes(byteOffset
);
696 if (fTableType
== URES_TABLE
) {
697 /* 16-bit count, 16-bit key offsets, 32-bit values */
698 fRes
= URES_MAKE_RESOURCE(URES_TABLE
, *byteOffset
>> 2);
699 *byteOffset
+= 2 + fCount
* 6;
701 /* 32-bit count, key offsets and values */
702 fRes
= URES_MAKE_RESOURCE(URES_TABLE32
, *byteOffset
>> 2);
703 *byteOffset
+= 4 + fCount
* 8;
708 SResource::preWrite(uint32_t *byteOffset
) {
709 if (fRes
!= RES_BOGUS
) {
711 * The resource item word was already precomputed, which means
712 * no further data needs to be written.
713 * This might be an integer, or an empty or UTF-16 v2 string,
714 * an empty binary, etc.
718 handlePreWrite(byteOffset
);
719 *byteOffset
+= calcPadding(*byteOffset
);
723 SResource::handlePreWrite(uint32_t * /*byteOffset*/) {
728 * Only called for UTF-16 v1 strings, and for aliases. For UTF-16 v2 strings,
729 * write() sees fWritten and exits early.
732 StringBaseResource::handleWrite(UNewDataMemory
*mem
, uint32_t *byteOffset
) {
733 /* Write the UTF-16 v1 string. */
734 int32_t len
= length();
735 udata_write32(mem
, len
);
736 udata_writeUString(mem
, getBuffer(), len
+ 1);
737 *byteOffset
+= 4 + (len
+ 1) * U_SIZEOF_UCHAR
;
742 ContainerResource::writeAllRes(UNewDataMemory
*mem
, uint32_t *byteOffset
) {
744 for (SResource
*current
= fFirst
; current
!= NULL
; ++i
, current
= current
->fNext
) {
745 current
->write(mem
, byteOffset
);
751 ContainerResource::writeAllRes32(UNewDataMemory
*mem
, uint32_t *byteOffset
) {
752 for (SResource
*current
= fFirst
; current
!= NULL
; current
= current
->fNext
) {
753 udata_write32(mem
, current
->fRes
);
755 *byteOffset
+= fCount
* 4;
759 ArrayResource::handleWrite(UNewDataMemory
*mem
, uint32_t *byteOffset
) {
760 writeAllRes(mem
, byteOffset
);
761 udata_write32(mem
, fCount
);
763 writeAllRes32(mem
, byteOffset
);
767 IntVectorResource::handleWrite(UNewDataMemory
*mem
, uint32_t *byteOffset
) {
768 udata_write32(mem
, fCount
);
769 for(uint32_t i
= 0; i
< fCount
; ++i
) {
770 udata_write32(mem
, fArray
[i
]);
772 *byteOffset
+= (1 + fCount
) * 4;
776 BinaryResource::handleWrite(UNewDataMemory
*mem
, uint32_t *byteOffset
) {
778 uint32_t dataStart
= *byteOffset
+ sizeof(fLength
);
780 if (dataStart
% BIN_ALIGNMENT
) {
781 pad
= (BIN_ALIGNMENT
- dataStart
% BIN_ALIGNMENT
);
782 udata_writePadding(mem
, pad
); /* pad == 4 or 8 or 12 */
786 udata_write32(mem
, fLength
);
788 udata_writeBlock(mem
, fData
, fLength
);
790 *byteOffset
+= 4 + fLength
;
794 TableResource::handleWrite(UNewDataMemory
*mem
, uint32_t *byteOffset
) {
795 writeAllRes(mem
, byteOffset
);
796 if(fTableType
== URES_TABLE
) {
797 udata_write16(mem
, (uint16_t)fCount
);
798 for (SResource
*current
= fFirst
; current
!= NULL
; current
= current
->fNext
) {
799 udata_write16(mem
, current
->fKey16
);
801 *byteOffset
+= (1 + fCount
)* 2;
802 if ((fCount
& 1) == 0) {
803 /* 16-bit count and even number of 16-bit key offsets need padding before 32-bit resource items */
804 udata_writePadding(mem
, 2);
807 } else /* URES_TABLE32 */ {
808 udata_write32(mem
, fCount
);
809 for (SResource
*current
= fFirst
; current
!= NULL
; current
= current
->fNext
) {
810 udata_write32(mem
, (uint32_t)current
->fKey
);
812 *byteOffset
+= (1 + fCount
)* 4;
814 writeAllRes32(mem
, byteOffset
);
818 SResource::write(UNewDataMemory
*mem
, uint32_t *byteOffset
) {
820 assert(fRes
!= RES_BOGUS
);
823 handleWrite(mem
, byteOffset
);
824 uint8_t paddingSize
= calcPadding(*byteOffset
);
825 if (paddingSize
> 0) {
826 udata_writePadding(mem
, paddingSize
);
827 *byteOffset
+= paddingSize
;
833 SResource::handleWrite(UNewDataMemory
* /*mem*/, uint32_t * /*byteOffset*/) {
837 void SRBRoot::write(const char *outputDir
, const char *outputPkg
,
838 char *writtenFilename
, int writtenFilenameLen
,
839 UErrorCode
&errorCode
) {
840 UNewDataMemory
*mem
= NULL
;
841 uint32_t byteOffset
= 0;
844 int32_t indexes
[URES_INDEX_TOP
];
846 compactKeys(errorCode
);
848 * Add padding bytes to fKeys so that fKeysTop is 4-aligned.
849 * Safe because the capacity is a multiple of 4.
851 while (fKeysTop
& 3) {
852 fKeys
[fKeysTop
++] = (char)0xaa;
855 * In URES_TABLE, use all local key offsets that fit into 16 bits,
856 * and use the remaining 16-bit offsets for pool key offsets
858 * If there are no local keys, then use the whole 16-bit space
859 * for pool key offsets.
860 * Note: This cannot be changed without changing the major formatVersion.
862 if (fKeysBottom
< fKeysTop
) {
863 if (fKeysTop
<= 0x10000) {
864 fLocalKeyLimit
= fKeysTop
;
866 fLocalKeyLimit
= 0x10000;
872 UHashtable
*stringSet
;
873 if (gFormatVersion
> 1) {
874 stringSet
= uhash_open(string_hash
, string_comp
, string_comp
, &errorCode
);
875 if (U_SUCCESS(errorCode
) &&
876 fUsePoolBundle
!= NULL
&& fUsePoolBundle
->fStrings
!= NULL
) {
877 for (SResource
*current
= fUsePoolBundle
->fStrings
->fFirst
;
879 current
= current
->fNext
) {
880 StringResource
*sr
= static_cast<StringResource
*>(current
);
882 sr
->fNumUnitsSaved
= 0;
883 uhash_put(stringSet
, sr
, sr
, &errorCode
);
886 fRoot
->preflightStrings(this, stringSet
, errorCode
);
890 if (fStringsForm
== STRINGS_UTF16_V2
&& f16BitStringsLength
> 0) {
891 compactStringsV2(stringSet
, errorCode
);
893 uhash_close(stringSet
);
894 if (U_FAILURE(errorCode
)) {
898 int32_t formatVersion
= gFormatVersion
;
899 if (fPoolStringIndexLimit
!= 0) {
900 int32_t sum
= fPoolStringIndexLimit
+ fLocalStringIndexLimit
;
901 if ((sum
- 1) > RES_MAX_OFFSET
) {
902 errorCode
= U_BUFFER_OVERFLOW_ERROR
;
905 if (fPoolStringIndexLimit
< 0x10000 && sum
<= 0x10000) {
906 // 16-bit indexes work for all pool + local strings.
907 fPoolStringIndex16Limit
= fPoolStringIndexLimit
;
909 // Set the pool index threshold so that 16-bit indexes work
910 // for some pool strings and some local strings.
911 fPoolStringIndex16Limit
= (int32_t)(
912 ((int64_t)fPoolStringIndexLimit
* 0xffff) / sum
);
914 } else if (gIsDefaultFormatVersion
&& formatVersion
== 3 && !fIsPoolBundle
) {
915 // If we just default to formatVersion 3
916 // but there are no pool bundle strings to share
917 // and we do not write a pool bundle,
918 // then write formatVersion 2 which is just as good.
922 fRoot
->write16(this);
923 if (f16BitUnits
.isBogus()) {
924 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
927 if (f16BitUnits
.length() & 1) {
928 f16BitUnits
.append((UChar
)0xaaaa); /* pad to multiple of 4 bytes */
931 byteOffset
= fKeysTop
+ f16BitUnits
.length() * 2;
932 fRoot
->preWrite(&byteOffset
);
934 /* total size including the root item */
937 if (writtenFilename
&& writtenFilenameLen
) {
938 *writtenFilename
= 0;
941 if (writtenFilename
) {
942 int32_t off
= 0, len
= 0;
944 len
= (int32_t)uprv_strlen(outputDir
);
945 if (len
> writtenFilenameLen
) {
946 len
= writtenFilenameLen
;
948 uprv_strncpy(writtenFilename
, outputDir
, len
);
950 if (writtenFilenameLen
-= len
) {
952 writtenFilename
[off
] = U_FILE_SEP_CHAR
;
953 if (--writtenFilenameLen
) {
955 if(outputPkg
!= NULL
)
957 uprv_strcpy(writtenFilename
+off
, outputPkg
);
958 off
+= (int32_t)uprv_strlen(outputPkg
);
959 writtenFilename
[off
] = '_';
963 len
= (int32_t)uprv_strlen(fLocale
);
964 if (len
> writtenFilenameLen
) {
965 len
= writtenFilenameLen
;
967 uprv_strncpy(writtenFilename
+ off
, fLocale
, len
);
968 if (writtenFilenameLen
-= len
) {
971 if (len
> writtenFilenameLen
) {
972 len
= writtenFilenameLen
;
974 uprv_strncpy(writtenFilename
+ off
, ".res", len
);
982 uprv_strcpy(dataName
, outputPkg
);
983 uprv_strcat(dataName
, "_");
984 uprv_strcat(dataName
, fLocale
);
988 uprv_strcpy(dataName
, fLocale
);
991 uprv_memcpy(dataInfo
.formatVersion
, gFormatVersions
+ formatVersion
, sizeof(UVersionInfo
));
993 mem
= udata_create(outputDir
, "res", dataName
,
994 &dataInfo
, (gIncludeCopyright
==TRUE
)? U_COPYRIGHT_STRING
:NULL
, &errorCode
);
995 if(U_FAILURE(errorCode
)){
999 /* write the root item */
1000 udata_write32(mem
, fRoot
->fRes
);
1003 * formatVersion 1.1 (ICU 2.8):
1004 * write int32_t indexes[] after root and before the key strings
1005 * to make it easier to parse resource bundles in icuswap or from Java etc.
1007 uprv_memset(indexes
, 0, sizeof(indexes
));
1008 indexes
[URES_INDEX_LENGTH
]= fIndexLength
;
1009 indexes
[URES_INDEX_KEYS_TOP
]= fKeysTop
>>2;
1010 indexes
[URES_INDEX_RESOURCES_TOP
]= (int32_t)(top
>>2);
1011 indexes
[URES_INDEX_BUNDLE_TOP
]= indexes
[URES_INDEX_RESOURCES_TOP
];
1012 indexes
[URES_INDEX_MAX_TABLE_LENGTH
]= fMaxTableLength
;
1015 * formatVersion 1.2 (ICU 3.6):
1016 * write indexes[URES_INDEX_ATTRIBUTES] with URES_ATT_NO_FALLBACK set or not set
1017 * the memset() above initialized all indexes[] to 0
1020 indexes
[URES_INDEX_ATTRIBUTES
]=URES_ATT_NO_FALLBACK
;
1023 * formatVersion 2.0 (ICU 4.4):
1024 * more compact string value storage, optional pool bundle
1026 if (URES_INDEX_16BIT_TOP
< fIndexLength
) {
1027 indexes
[URES_INDEX_16BIT_TOP
] = (fKeysTop
>>2) + (f16BitUnits
.length()>>1);
1029 if (URES_INDEX_POOL_CHECKSUM
< fIndexLength
) {
1030 if (fIsPoolBundle
) {
1031 indexes
[URES_INDEX_ATTRIBUTES
] |= URES_ATT_IS_POOL_BUNDLE
| URES_ATT_NO_FALLBACK
;
1032 uint32_t checksum
= computeCRC((const char *)(fKeys
+ fKeysBottom
),
1033 (uint32_t)(fKeysTop
- fKeysBottom
), 0);
1034 if (f16BitUnits
.length() <= 1) {
1035 // no pool strings to checksum
1036 } else if (U_IS_BIG_ENDIAN
) {
1037 checksum
= computeCRC(reinterpret_cast<const char *>(f16BitUnits
.getBuffer()),
1038 (uint32_t)f16BitUnits
.length() * 2, checksum
);
1040 // Swap to big-endian so we get the same checksum on all platforms
1041 // (except for charset family, due to the key strings).
1042 UnicodeString
s(f16BitUnits
);
1043 assert(!s
.isBogus());
1044 // .getBuffer(capacity) returns a mutable buffer
1045 char16_t* p
= s
.getBuffer(f16BitUnits
.length());
1046 for (int32_t count
= f16BitUnits
.length(); count
> 0; --count
) {
1048 *p
++ = (uint16_t)((x
<< 8) | (x
>> 8));
1050 s
.releaseBuffer(f16BitUnits
.length());
1051 checksum
= computeCRC((const char *)s
.getBuffer(),
1052 (uint32_t)f16BitUnits
.length() * 2, checksum
);
1054 indexes
[URES_INDEX_POOL_CHECKSUM
] = (int32_t)checksum
;
1055 } else if (gUsePoolBundle
) {
1056 indexes
[URES_INDEX_ATTRIBUTES
] |= URES_ATT_USES_POOL_BUNDLE
;
1057 indexes
[URES_INDEX_POOL_CHECKSUM
] = fUsePoolBundle
->fChecksum
;
1060 // formatVersion 3 (ICU 56):
1061 // share string values via pool bundle strings
1062 indexes
[URES_INDEX_LENGTH
] |= fPoolStringIndexLimit
<< 8; // bits 23..0 -> 31..8
1063 indexes
[URES_INDEX_ATTRIBUTES
] |= (fPoolStringIndexLimit
>> 12) & 0xf000; // bits 27..24 -> 15..12
1064 indexes
[URES_INDEX_ATTRIBUTES
] |= fPoolStringIndex16Limit
<< 16;
1066 /* write the indexes[] */
1067 udata_writeBlock(mem
, indexes
, fIndexLength
*4);
1069 /* write the table key strings */
1070 udata_writeBlock(mem
, fKeys
+fKeysBottom
,
1071 fKeysTop
-fKeysBottom
);
1073 /* write the v2 UTF-16 strings, URES_TABLE16 and URES_ARRAY16 */
1074 udata_writeBlock(mem
, f16BitUnits
.getBuffer(), f16BitUnits
.length()*2);
1076 /* write all of the bundle contents: the root item and its children */
1077 byteOffset
= fKeysTop
+ f16BitUnits
.length() * 2;
1078 fRoot
->write(mem
, &byteOffset
);
1079 assert(byteOffset
== top
);
1081 size
= udata_finish(mem
, &errorCode
);
1083 fprintf(stderr
, "genrb error: wrote %u bytes but counted %u\n",
1084 (int)size
, (int)top
);
1085 errorCode
= U_INTERNAL_PROGRAM_ERROR
;
1089 /* Opening Functions */
1091 TableResource
* table_open(struct SRBRoot
*bundle
, const char *tag
, const struct UString
* comment
, UErrorCode
*status
) {
1092 LocalPointer
<TableResource
> res(new TableResource(bundle
, tag
, comment
, *status
), *status
);
1093 return U_SUCCESS(*status
) ? res
.orphan() : NULL
;
1096 ArrayResource
* array_open(struct SRBRoot
*bundle
, const char *tag
, const struct UString
* comment
, UErrorCode
*status
) {
1097 LocalPointer
<ArrayResource
> res(new ArrayResource(bundle
, tag
, comment
, *status
), *status
);
1098 return U_SUCCESS(*status
) ? res
.orphan() : NULL
;
1101 struct SResource
*string_open(struct SRBRoot
*bundle
, const char *tag
, const UChar
*value
, int32_t len
, const struct UString
* comment
, UErrorCode
*status
) {
1102 LocalPointer
<SResource
> res(
1103 new StringResource(bundle
, tag
, value
, len
, comment
, *status
), *status
);
1104 return U_SUCCESS(*status
) ? res
.orphan() : NULL
;
1107 struct SResource
*alias_open(struct SRBRoot
*bundle
, const char *tag
, UChar
*value
, int32_t len
, const struct UString
* comment
, UErrorCode
*status
) {
1108 LocalPointer
<SResource
> res(
1109 new AliasResource(bundle
, tag
, value
, len
, comment
, *status
), *status
);
1110 return U_SUCCESS(*status
) ? res
.orphan() : NULL
;
1113 IntVectorResource
*intvector_open(struct SRBRoot
*bundle
, const char *tag
, const struct UString
* comment
, UErrorCode
*status
) {
1114 LocalPointer
<IntVectorResource
> res(
1115 new IntVectorResource(bundle
, tag
, comment
, *status
), *status
);
1116 return U_SUCCESS(*status
) ? res
.orphan() : NULL
;
1119 struct SResource
*int_open(struct SRBRoot
*bundle
, const char *tag
, int32_t value
, const struct UString
* comment
, UErrorCode
*status
) {
1120 LocalPointer
<SResource
> res(new IntResource(bundle
, tag
, value
, comment
, *status
), *status
);
1121 return U_SUCCESS(*status
) ? res
.orphan() : NULL
;
1124 struct SResource
*bin_open(struct SRBRoot
*bundle
, const char *tag
, uint32_t length
, uint8_t *data
, const char* fileName
, const struct UString
* comment
, UErrorCode
*status
) {
1125 LocalPointer
<SResource
> res(
1126 new BinaryResource(bundle
, tag
, length
, data
, fileName
, comment
, *status
), *status
);
1127 return U_SUCCESS(*status
) ? res
.orphan() : NULL
;
1130 SRBRoot::SRBRoot(const UString
*comment
, UBool isPoolBundle
, UErrorCode
&errorCode
)
1131 : fRoot(NULL
), fLocale(NULL
), fIndexLength(0), fMaxTableLength(0), fNoFallback(FALSE
),
1132 fStringsForm(STRINGS_UTF16_V1
), fIsPoolBundle(isPoolBundle
),
1133 fKeys(NULL
), fKeyMap(NULL
),
1134 fKeysBottom(0), fKeysTop(0), fKeysCapacity(0),
1135 fKeysCount(0), fLocalKeyLimit(0),
1136 f16BitUnits(), f16BitStringsLength(0),
1137 fUsePoolBundle(&kNoPoolBundle
),
1138 fPoolStringIndexLimit(0), fPoolStringIndex16Limit(0), fLocalStringIndexLimit(0),
1139 fWritePoolBundle(NULL
) {
1140 if (U_FAILURE(errorCode
)) {
1144 if (gFormatVersion
> 1) {
1145 // f16BitUnits must start with a zero for empty resources.
1146 // We might be able to omit it if there are no empty 16-bit resources.
1147 f16BitUnits
.append((UChar
)0);
1150 fKeys
= (char *) uprv_malloc(sizeof(char) * KEY_SPACE_SIZE
);
1152 fRoot
= new PseudoListResource(this, errorCode
);
1154 fRoot
= new TableResource(this, NULL
, comment
, errorCode
);
1156 if (fKeys
== NULL
|| fRoot
== NULL
|| U_FAILURE(errorCode
)) {
1157 if (U_SUCCESS(errorCode
)) {
1158 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
1163 fKeysCapacity
= KEY_SPACE_SIZE
;
1164 /* formatVersion 1.1 and up: start fKeysTop after the root item and indexes[] */
1165 if (gUsePoolBundle
|| isPoolBundle
) {
1166 fIndexLength
= URES_INDEX_POOL_CHECKSUM
+ 1;
1167 } else if (gFormatVersion
>= 2) {
1168 fIndexLength
= URES_INDEX_16BIT_TOP
+ 1;
1169 } else /* formatVersion 1 */ {
1170 fIndexLength
= URES_INDEX_ATTRIBUTES
+ 1;
1172 fKeysBottom
= (1 /* root */ + fIndexLength
) * 4;
1173 uprv_memset(fKeys
, 0, fKeysBottom
);
1174 fKeysTop
= fKeysBottom
;
1176 if (gFormatVersion
== 1) {
1177 fStringsForm
= STRINGS_UTF16_V1
;
1179 fStringsForm
= STRINGS_UTF16_V2
;
1183 /* Closing Functions */
1185 void res_close(struct SResource
*res
) {
1189 SRBRoot::~SRBRoot() {
1196 /* Misc Functions */
1198 void SRBRoot::setLocale(UChar
*locale
, UErrorCode
&errorCode
) {
1199 if(U_FAILURE(errorCode
)) {
1204 fLocale
= (char*) uprv_malloc(sizeof(char) * (u_strlen(locale
)+1));
1205 if(fLocale
== NULL
) {
1206 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
1210 u_UCharsToChars(locale
, fLocale
, u_strlen(locale
)+1);
1214 SRBRoot::getKeyString(int32_t key
) const {
1216 return fUsePoolBundle
->fKeys
+ (key
& 0x7fffffff);
1223 SResource::getKeyString(const SRBRoot
*bundle
) const {
1227 return bundle
->getKeyString(fKey
);
1231 SRBRoot::getKeyBytes(int32_t *pLength
) const {
1232 *pLength
= fKeysTop
- fKeysBottom
;
1233 return fKeys
+ fKeysBottom
;
1237 SRBRoot::addKeyBytes(const char *keyBytes
, int32_t length
, UErrorCode
&errorCode
) {
1240 // It is not legal to add new key bytes after compactKeys is run!
1241 U_ASSERT(fKeyMap
== nullptr);
1243 if (U_FAILURE(errorCode
)) {
1246 if (length
< 0 || (keyBytes
== NULL
&& length
!= 0)) {
1247 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
1256 if (fKeysTop
>= fKeysCapacity
) {
1257 /* overflow - resize the keys buffer */
1258 fKeysCapacity
+= KEY_SPACE_SIZE
;
1259 fKeys
= static_cast<char *>(uprv_realloc(fKeys
, fKeysCapacity
));
1261 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
1266 uprv_memcpy(fKeys
+ keypos
, keyBytes
, length
);
1272 SRBRoot::addTag(const char *tag
, UErrorCode
&errorCode
) {
1275 if (U_FAILURE(errorCode
)) {
1280 /* no error: the root table and array items have no keys */
1284 keypos
= addKeyBytes(tag
, (int32_t)(uprv_strlen(tag
) + 1), errorCode
);
1285 if (U_SUCCESS(errorCode
)) {
1292 compareInt32(int32_t lPos
, int32_t rPos
) {
1294 * Compare possibly-negative key offsets. Don't just return lPos - rPos
1295 * because that is prone to negative-integer underflows.
1299 } else if (lPos
> rPos
) {
1306 static int32_t U_CALLCONV
1307 compareKeySuffixes(const void *context
, const void *l
, const void *r
) {
1308 const struct SRBRoot
*bundle
=(const struct SRBRoot
*)context
;
1309 int32_t lPos
= ((const KeyMapEntry
*)l
)->oldpos
;
1310 int32_t rPos
= ((const KeyMapEntry
*)r
)->oldpos
;
1311 const char *lStart
= bundle
->getKeyString(lPos
);
1312 const char *lLimit
= lStart
;
1313 const char *rStart
= bundle
->getKeyString(rPos
);
1314 const char *rLimit
= rStart
;
1316 while (*lLimit
!= 0) { ++lLimit
; }
1317 while (*rLimit
!= 0) { ++rLimit
; }
1318 /* compare keys in reverse character order */
1319 while (lStart
< lLimit
&& rStart
< rLimit
) {
1320 diff
= (int32_t)(uint8_t)*--lLimit
- (int32_t)(uint8_t)*--rLimit
;
1325 /* sort equal suffixes by descending key length */
1326 diff
= (int32_t)(rLimit
- rStart
) - (int32_t)(lLimit
- lStart
);
1330 /* Sort pool bundle keys first (negative oldpos), and otherwise keys in parsing order. */
1331 return compareInt32(lPos
, rPos
);
1334 static int32_t U_CALLCONV
1335 compareKeyNewpos(const void * /*context*/, const void *l
, const void *r
) {
1336 return compareInt32(((const KeyMapEntry
*)l
)->newpos
, ((const KeyMapEntry
*)r
)->newpos
);
1339 static int32_t U_CALLCONV
1340 compareKeyOldpos(const void * /*context*/, const void *l
, const void *r
) {
1341 return compareInt32(((const KeyMapEntry
*)l
)->oldpos
, ((const KeyMapEntry
*)r
)->oldpos
);
1344 void SResource::collectKeys(std::function
<void(int32_t)> collector
) const {
1348 void ContainerResource::collectKeys(std::function
<void(int32_t)> collector
) const {
1350 for (SResource
* curr
= fFirst
; curr
!= NULL
; curr
= curr
->fNext
) {
1351 curr
->collectKeys(collector
);
1356 SRBRoot::compactKeys(UErrorCode
&errorCode
) {
1361 // Except for pool bundles, keys might not be used.
1362 // Do not add unused keys to the final bundle.
1363 std::set
<int32_t> keysInUse
;
1364 if (!fIsPoolBundle
) {
1365 fRoot
->collectKeys([&keysInUse
](int32_t key
) {
1367 keysInUse
.insert(key
);
1370 fKeysCount
= static_cast<int32_t>(keysInUse
.size());
1373 int32_t keysCount
= fUsePoolBundle
->fKeysCount
+ fKeysCount
;
1374 if (U_FAILURE(errorCode
) || fKeyMap
!= NULL
) {
1377 map
= (KeyMapEntry
*)uprv_malloc(keysCount
* sizeof(KeyMapEntry
));
1379 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
1382 keys
= (char *)fUsePoolBundle
->fKeys
;
1383 for (i
= 0; i
< fUsePoolBundle
->fKeysCount
; ++i
) {
1385 (int32_t)(keys
- fUsePoolBundle
->fKeys
) | 0x80000000; /* negative oldpos */
1387 while (*keys
!= 0) { ++keys
; } /* skip the key */
1388 ++keys
; /* skip the NUL */
1390 keys
= fKeys
+ fKeysBottom
;
1391 while (i
< keysCount
) {
1392 int32_t keyOffset
= static_cast<int32_t>(keys
- fKeys
);
1393 if (!fIsPoolBundle
&& keysInUse
.count(keyOffset
) == 0) {
1394 // Mark the unused key as deleted
1395 while (*keys
!= 0) { *keys
++ = 1; }
1398 map
[i
].oldpos
= keyOffset
;
1400 while (*keys
!= 0) { ++keys
; } /* skip the key */
1401 ++keys
; /* skip the NUL */
1405 if (keys
!= fKeys
+ fKeysTop
) {
1406 // Throw away any unused keys from the end
1407 fKeysTop
= static_cast<int32_t>(keys
- fKeys
);
1409 /* Sort the keys so that each one is immediately followed by all of its suffixes. */
1410 uprv_sortArray(map
, keysCount
, (int32_t)sizeof(KeyMapEntry
),
1411 compareKeySuffixes
, this, FALSE
, &errorCode
);
1413 * Make suffixes point into earlier, longer strings that contain them
1414 * and mark the old, now unused suffix bytes as deleted.
1416 if (U_SUCCESS(errorCode
)) {
1418 for (i
= 0; i
< keysCount
;) {
1420 * This key is not a suffix of the previous one;
1421 * keep this one and delete the following ones that are
1422 * suffixes of this one.
1425 const char *keyLimit
;
1427 map
[i
].newpos
= map
[i
].oldpos
;
1428 if (j
< keysCount
&& map
[j
].oldpos
< 0) {
1429 /* Key string from the pool bundle, do not delete. */
1433 key
= getKeyString(map
[i
].oldpos
);
1434 for (keyLimit
= key
; *keyLimit
!= 0; ++keyLimit
) {}
1435 for (; j
< keysCount
&& map
[j
].oldpos
>= 0; ++j
) {
1438 const char *suffixLimit
;
1440 suffix
= keys
+ map
[j
].oldpos
;
1441 for (suffixLimit
= suffix
; *suffixLimit
!= 0; ++suffixLimit
) {}
1442 offset
= static_cast<int32_t>((keyLimit
- key
) - (suffixLimit
- suffix
));
1444 break; /* suffix cannot be longer than the original */
1446 /* Is it a suffix of the earlier, longer key? */
1447 for (k
= keyLimit
; suffix
< suffixLimit
&& *--k
== *--suffixLimit
;) {}
1448 if (suffix
== suffixLimit
&& *k
== *suffixLimit
) {
1449 map
[j
].newpos
= map
[i
].oldpos
+ offset
; /* yes, point to the earlier key */
1450 // Mark the suffix as deleted
1451 while (*suffix
!= 0) { *suffix
++ = 1; }
1454 break; /* not a suffix, restart from here */
1460 * Re-sort by newpos, then modify the key characters array in-place
1461 * to squeeze out unused bytes, and readjust the newpos offsets.
1463 uprv_sortArray(map
, keysCount
, (int32_t)sizeof(KeyMapEntry
),
1464 compareKeyNewpos
, NULL
, FALSE
, &errorCode
);
1465 if (U_SUCCESS(errorCode
)) {
1466 int32_t oldpos
, newpos
, limit
;
1467 oldpos
= newpos
= fKeysBottom
;
1469 /* skip key offsets that point into the pool bundle rather than this new bundle */
1470 for (i
= 0; i
< keysCount
&& map
[i
].newpos
< 0; ++i
) {}
1471 if (i
< keysCount
) {
1472 while (oldpos
< limit
) {
1473 if (keys
[oldpos
] == 1) {
1474 ++oldpos
; /* skip unused bytes */
1476 /* adjust the new offsets for keys starting here */
1477 while (i
< keysCount
&& map
[i
].newpos
== oldpos
) {
1478 map
[i
++].newpos
= newpos
;
1480 /* move the key characters to their new position */
1481 keys
[newpos
++] = keys
[oldpos
++];
1484 U_ASSERT(i
== keysCount
);
1487 /* Re-sort once more, by old offsets for binary searching. */
1488 uprv_sortArray(map
, keysCount
, (int32_t)sizeof(KeyMapEntry
),
1489 compareKeyOldpos
, NULL
, FALSE
, &errorCode
);
1490 if (U_SUCCESS(errorCode
)) {
1491 /* key size reduction by limit - newpos */
1500 static int32_t U_CALLCONV
1501 compareStringSuffixes(const void * /*context*/, const void *l
, const void *r
) {
1502 const StringResource
*left
= *((const StringResource
**)l
);
1503 const StringResource
*right
= *((const StringResource
**)r
);
1504 const UChar
*lStart
= left
->getBuffer();
1505 const UChar
*lLimit
= lStart
+ left
->length();
1506 const UChar
*rStart
= right
->getBuffer();
1507 const UChar
*rLimit
= rStart
+ right
->length();
1509 /* compare keys in reverse character order */
1510 while (lStart
< lLimit
&& rStart
< rLimit
) {
1511 diff
= (int32_t)*--lLimit
- (int32_t)*--rLimit
;
1516 /* sort equal suffixes by descending string length */
1517 return right
->length() - left
->length();
1520 static int32_t U_CALLCONV
1521 compareStringLengths(const void * /*context*/, const void *l
, const void *r
) {
1522 const StringResource
*left
= *((const StringResource
**)l
);
1523 const StringResource
*right
= *((const StringResource
**)r
);
1525 /* Make "is suffix of another string" compare greater than a non-suffix. */
1526 diff
= (int)(left
->fSame
!= NULL
) - (int)(right
->fSame
!= NULL
);
1530 /* sort by ascending string length */
1531 diff
= left
->length() - right
->length();
1535 // sort by descending size reduction
1536 diff
= right
->fNumUnitsSaved
- left
->fNumUnitsSaved
;
1541 return left
->fString
.compare(right
->fString
);
1545 StringResource::writeUTF16v2(int32_t base
, UnicodeString
&dest
) {
1546 int32_t len
= length();
1547 fRes
= URES_MAKE_RESOURCE(URES_STRING_V2
, base
+ dest
.length());
1549 switch(fNumCharsForLength
) {
1553 dest
.append((UChar
)(0xdc00 + len
));
1556 dest
.append((UChar
)(0xdfef + (len
>> 16)));
1557 dest
.append((UChar
)len
);
1560 dest
.append((UChar
)0xdfff);
1561 dest
.append((UChar
)(len
>> 16));
1562 dest
.append((UChar
)len
);
1565 break; /* will not occur */
1567 dest
.append(fString
);
1568 dest
.append((UChar
)0);
1572 SRBRoot::compactStringsV2(UHashtable
*stringSet
, UErrorCode
&errorCode
) {
1573 if (U_FAILURE(errorCode
)) {
1576 // Store the StringResource pointers in an array for
1577 // easy sorting and processing.
1578 // We enumerate a set of strings, so there are no duplicates.
1579 int32_t count
= uhash_count(stringSet
);
1580 LocalArray
<StringResource
*> array(new StringResource
*[count
], errorCode
);
1581 if (U_FAILURE(errorCode
)) {
1584 for (int32_t pos
= UHASH_FIRST
, i
= 0; i
< count
; ++i
) {
1585 array
[i
] = (StringResource
*)uhash_nextElement(stringSet
, &pos
)->key
.pointer
;
1587 /* Sort the strings so that each one is immediately followed by all of its suffixes. */
1588 uprv_sortArray(array
.getAlias(), count
, (int32_t)sizeof(struct SResource
**),
1589 compareStringSuffixes
, NULL
, FALSE
, &errorCode
);
1590 if (U_FAILURE(errorCode
)) {
1594 * Make suffixes point into earlier, longer strings that contain them.
1595 * Temporarily use fSame and fSuffixOffset for suffix strings to
1596 * refer to the remaining ones.
1598 for (int32_t i
= 0; i
< count
;) {
1600 * This string is not a suffix of the previous one;
1601 * write this one and subsume the following ones that are
1602 * suffixes of this one.
1604 StringResource
*res
= array
[i
];
1605 res
->fNumUnitsSaved
= (res
->fNumCopies
- 1) * res
->get16BitStringsLength();
1606 // Whole duplicates of pool strings are already account for in fPoolStringIndexLimit,
1607 // see StringResource::handlePreflightStrings().
1609 for (j
= i
+ 1; j
< count
; ++j
) {
1610 StringResource
*suffixRes
= array
[j
];
1611 /* Is it a suffix of the earlier, longer string? */
1612 if (res
->fString
.endsWith(suffixRes
->fString
)) {
1613 assert(res
->length() != suffixRes
->length()); // Set strings are unique.
1614 if (suffixRes
->fWritten
) {
1615 // Pool string, skip.
1616 } else if (suffixRes
->fNumCharsForLength
== 0) {
1617 /* yes, point to the earlier string */
1618 suffixRes
->fSame
= res
;
1619 suffixRes
->fSuffixOffset
= res
->length() - suffixRes
->length();
1620 if (res
->fWritten
) {
1621 // Suffix-share res which is a pool string.
1622 // Compute the resource word and collect the maximum.
1624 res
->fRes
+ res
->fNumCharsForLength
+ suffixRes
->fSuffixOffset
;
1625 int32_t poolStringIndex
= (int32_t)RES_GET_OFFSET(suffixRes
->fRes
);
1626 if (poolStringIndex
>= fPoolStringIndexLimit
) {
1627 fPoolStringIndexLimit
= poolStringIndex
+ 1;
1629 suffixRes
->fWritten
= TRUE
;
1631 res
->fNumUnitsSaved
+= suffixRes
->fNumCopies
* suffixRes
->get16BitStringsLength();
1633 /* write the suffix by itself if we need explicit length */
1636 break; /* not a suffix, restart from here */
1642 * Re-sort the strings by ascending length (except suffixes last)
1643 * to optimize for URES_TABLE16 and URES_ARRAY16:
1644 * Keep as many as possible within reach of 16-bit offsets.
1646 uprv_sortArray(array
.getAlias(), count
, (int32_t)sizeof(struct SResource
**),
1647 compareStringLengths
, NULL
, FALSE
, &errorCode
);
1648 if (U_FAILURE(errorCode
)) {
1651 if (fIsPoolBundle
) {
1652 // Write strings that are sufficiently shared.
1653 // Avoid writing other strings.
1654 int32_t numStringsWritten
= 0;
1655 int32_t numUnitsSaved
= 0;
1656 int32_t numUnitsNotSaved
= 0;
1657 for (int32_t i
= 0; i
< count
; ++i
) {
1658 StringResource
*res
= array
[i
];
1659 // Maximum pool string index when suffix-sharing the last character.
1660 int32_t maxStringIndex
=
1661 f16BitUnits
.length() + res
->fNumCharsForLength
+ res
->length() - 1;
1662 if (res
->fNumUnitsSaved
>= GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING
&&
1663 maxStringIndex
< RES_MAX_OFFSET
) {
1664 res
->writeUTF16v2(0, f16BitUnits
);
1665 ++numStringsWritten
;
1666 numUnitsSaved
+= res
->fNumUnitsSaved
;
1668 numUnitsNotSaved
+= res
->fNumUnitsSaved
;
1669 res
->fRes
= URES_MAKE_EMPTY_RESOURCE(URES_STRING
);
1670 res
->fWritten
= TRUE
;
1673 if (f16BitUnits
.isBogus()) {
1674 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
1676 if (getShowWarning()) { // not quiet
1677 printf("number of shared strings: %d\n", (int)numStringsWritten
);
1678 printf("16-bit units for strings: %6d = %6d bytes\n",
1679 (int)f16BitUnits
.length(), (int)f16BitUnits
.length() * 2);
1680 printf("16-bit units saved: %6d = %6d bytes\n",
1681 (int)numUnitsSaved
, (int)numUnitsSaved
* 2);
1682 printf("16-bit units not saved: %6d = %6d bytes\n",
1683 (int)numUnitsNotSaved
, (int)numUnitsNotSaved
* 2);
1686 assert(fPoolStringIndexLimit
<= fUsePoolBundle
->fStringIndexLimit
);
1687 /* Write the non-suffix strings. */
1689 for (i
= 0; i
< count
&& array
[i
]->fSame
== NULL
; ++i
) {
1690 StringResource
*res
= array
[i
];
1691 if (!res
->fWritten
) {
1692 int32_t localStringIndex
= f16BitUnits
.length();
1693 if (localStringIndex
>= fLocalStringIndexLimit
) {
1694 fLocalStringIndexLimit
= localStringIndex
+ 1;
1696 res
->writeUTF16v2(fPoolStringIndexLimit
, f16BitUnits
);
1699 if (f16BitUnits
.isBogus()) {
1700 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
1703 if (fWritePoolBundle
!= NULL
&& gFormatVersion
>= 3) {
1704 PseudoListResource
*poolStrings
=
1705 static_cast<PseudoListResource
*>(fWritePoolBundle
->fRoot
);
1706 for (i
= 0; i
< count
&& array
[i
]->fSame
== NULL
; ++i
) {
1707 assert(!array
[i
]->fString
.isEmpty());
1708 StringResource
*poolString
=
1709 new StringResource(fWritePoolBundle
, array
[i
]->fString
, errorCode
);
1710 if (poolString
== NULL
) {
1711 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
1714 poolStrings
->add(poolString
);
1717 /* Write the suffix strings. Make each point to the real string. */
1718 for (; i
< count
; ++i
) {
1719 StringResource
*res
= array
[i
];
1720 if (res
->fWritten
) {
1723 StringResource
*same
= res
->fSame
;
1724 assert(res
->length() != same
->length()); // Set strings are unique.
1725 res
->fRes
= same
->fRes
+ same
->fNumCharsForLength
+ res
->fSuffixOffset
;
1726 int32_t localStringIndex
= (int32_t)RES_GET_OFFSET(res
->fRes
) - fPoolStringIndexLimit
;
1727 // Suffixes of pool strings have been set already.
1728 assert(localStringIndex
>= 0);
1729 if (localStringIndex
>= fLocalStringIndexLimit
) {
1730 fLocalStringIndexLimit
= localStringIndex
+ 1;
1732 res
->fWritten
= TRUE
;
1735 // +1 to account for the initial zero in f16BitUnits
1736 assert(f16BitUnits
.length() <= (f16BitStringsLength
+ 1));
1739 void SResource::applyFilter(
1740 const PathFilter
& /*filter*/,
1741 ResKeyPath
& /*path*/,
1742 const SRBRoot
* /*bundle*/) {
1743 // Only a few resource types (tables) are capable of being filtered.
1746 void TableResource::applyFilter(
1747 const PathFilter
& filter
,
1749 const SRBRoot
* bundle
) {
1750 SResource
* prev
= nullptr;
1751 SResource
* curr
= fFirst
;
1752 for (; curr
!= nullptr;) {
1753 path
.push(curr
->getKeyString(bundle
));
1754 auto inclusion
= filter
.match(path
);
1755 if (inclusion
== PathFilter::EInclusion::INCLUDE
) {
1756 // Include whole subtree
1759 std::cout
<< "genrb subtree: " << bundle
->fLocale
<< ": INCLUDE: " << path
<< std::endl
;
1761 } else if (inclusion
== PathFilter::EInclusion::EXCLUDE
) {
1762 // Reject the whole subtree
1763 // Remove it from the linked list
1765 std::cout
<< "genrb subtree: " << bundle
->fLocale
<< ": DELETE: " << path
<< std::endl
;
1767 if (prev
== nullptr) {
1768 fFirst
= curr
->fNext
;
1770 prev
->fNext
= curr
->fNext
;
1776 U_ASSERT(inclusion
== PathFilter::EInclusion::PARTIAL
);
1777 // Recurse into the child
1778 curr
->applyFilter(filter
, path
, bundle
);
1783 if (curr
== nullptr) {