]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/genrb/reslist.cpp
ICU-64232.0.1.tar.gz
[apple/icu.git] / icuSources / tools / genrb / reslist.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
2ca993e8
A
3/*
4*******************************************************************************
5*
6* Copyright (C) 2000-2015, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10*
11* File reslist.cpp
12*
13* Modification History:
14*
15* Date Name Description
16* 02/21/00 weiv Creation.
17*******************************************************************************
18*/
19
20// Safer use of UnicodeString.
21#ifndef UNISTR_FROM_CHAR_EXPLICIT
22# define UNISTR_FROM_CHAR_EXPLICIT explicit
23#endif
24
25// Less important, but still a good idea.
26#ifndef UNISTR_FROM_STRING_EXPLICIT
27# define UNISTR_FROM_STRING_EXPLICIT explicit
28#endif
29
30#include <assert.h>
3d1f044b
A
31#include <iostream>
32#include <set>
2ca993e8 33#include <stdio.h>
3d1f044b 34
2ca993e8
A
35#include "unicode/localpointer.h"
36#include "reslist.h"
37#include "unewdata.h"
38#include "unicode/ures.h"
39#include "unicode/putil.h"
40#include "errmsg.h"
3d1f044b 41#include "filterrb.h"
2ca993e8
A
42
43#include "uarrsort.h"
44#include "uelement.h"
45#include "uhash.h"
46#include "uinvchar.h"
47#include "ustr_imp.h"
48#include "unicode/utf16.h"
3d1f044b
A
49#include "uassert.h"
50
2ca993e8
A
51/*
52 * Align binary data at a 16-byte offset from the start of the resource bundle,
53 * to be safe for any data type it may contain.
54 */
55#define BIN_ALIGNMENT 16
56
57// This numeric constant must be at least 1.
58// If StringResource.fNumUnitsSaved == 0 then the string occurs only once,
59// and it makes no sense to move it to the pool bundle.
60// The larger the threshold for fNumUnitsSaved
61// the smaller the savings, and the smaller the pool bundle.
62// We trade some total size reduction to reduce the pool bundle a bit,
63// so that one can reasonably save data size by
64// removing bundle files without rebuilding the pool bundle.
65// This can also help to keep the pool and total (pool+local) string indexes
66// within 16 bits, that is, within range of Table16 and Array16 containers.
67#ifndef GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING
68# define GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING 10
69#endif
70
71U_NAMESPACE_USE
72
73static UBool gIncludeCopyright = FALSE;
74static UBool gUsePoolBundle = FALSE;
75static UBool gIsDefaultFormatVersion = TRUE;
76static int32_t gFormatVersion = 3;
77
78/* How do we store string values? */
79enum {
80 STRINGS_UTF16_V1, /* formatVersion 1: int length + UChars + NUL + padding to 4 bytes */
81 STRINGS_UTF16_V2 /* formatVersion 2 & up: optional length in 1..3 UChars + UChars + NUL */
82};
83
84static const int32_t MAX_IMPLICIT_STRING_LENGTH = 40; /* do not store the length explicitly for such strings */
85
86static const ResFile kNoPoolBundle;
87
88/*
89 * res_none() returns the address of kNoResource,
90 * for use in non-error cases when no resource is to be added to the bundle.
91 * (NULL is used in error cases.)
92 */
93static SResource kNoResource; // TODO: const
94
95static UDataInfo dataInfo= {
96 sizeof(UDataInfo),
97 0,
98
99 U_IS_BIG_ENDIAN,
100 U_CHARSET_FAMILY,
101 sizeof(UChar),
102 0,
103
104 {0x52, 0x65, 0x73, 0x42}, /* dataFormat="ResB" */
105 {1, 3, 0, 0}, /* formatVersion */
106 {1, 4, 0, 0} /* dataVersion take a look at version inside parsed resb*/
107};
108
109static const UVersionInfo gFormatVersions[4] = { /* indexed by a major-formatVersion integer */
110 { 0, 0, 0, 0 },
111 { 1, 3, 0, 0 },
112 { 2, 0, 0, 0 },
113 { 3, 0, 0, 0 }
114};
115// Remember to update genrb.h GENRB_VERSION when changing the data format.
116// (Or maybe we should remove GENRB_VERSION and report the ICU version number?)
117
118static uint8_t calcPadding(uint32_t size) {
119 /* returns space we need to pad */
120 return (uint8_t) ((size % sizeof(uint32_t)) ? (sizeof(uint32_t) - (size % sizeof(uint32_t))) : 0);
121
122}
123
124void setIncludeCopyright(UBool val){
125 gIncludeCopyright=val;
126}
127
128UBool getIncludeCopyright(void){
129 return gIncludeCopyright;
130}
131
132void setFormatVersion(int32_t formatVersion) {
133 gIsDefaultFormatVersion = FALSE;
134 gFormatVersion = formatVersion;
135}
136
137int32_t getFormatVersion() {
138 return gFormatVersion;
139}
140
141void setUsePoolBundle(UBool use) {
142 gUsePoolBundle = use;
143}
144
145// TODO: return const pointer, or find another way to express "none"
146struct SResource* res_none() {
147 return &kNoResource;
148}
149
150SResource::SResource()
151 : fType(URES_NONE), fWritten(FALSE), fRes(RES_BOGUS), fRes16(-1), fKey(-1), fKey16(-1),
152 line(0), fNext(NULL) {
153 ustr_init(&fComment);
154}
155
156SResource::SResource(SRBRoot *bundle, const char *tag, int8_t type, const UString* comment,
157 UErrorCode &errorCode)
158 : fType(type), fWritten(FALSE), fRes(RES_BOGUS), fRes16(-1),
159 fKey(bundle != NULL ? bundle->addTag(tag, errorCode) : -1), fKey16(-1),
160 line(0), fNext(NULL) {
161 ustr_init(&fComment);
162 if(comment != NULL) {
163 ustr_cpy(&fComment, comment, &errorCode);
164 }
165}
166
167SResource::~SResource() {
168 ustr_deinit(&fComment);
169}
170
171ContainerResource::~ContainerResource() {
172 SResource *current = fFirst;
173 while (current != NULL) {
174 SResource *next = current->fNext;
175 delete current;
176 current = next;
177 }
178}
179
180TableResource::~TableResource() {}
181
182// TODO: clarify that containers adopt new items, even in error cases; use LocalPointer
183void TableResource::add(SResource *res, int linenumber, UErrorCode &errorCode) {
184 if (U_FAILURE(errorCode) || res == NULL || res == &kNoResource) {
185 return;
186 }
187
188 /* remember this linenumber to report to the user if there is a duplicate key */
189 res->line = linenumber;
190
191 /* here we need to traverse the list */
192 ++fCount;
193
194 /* is the list still empty? */
195 if (fFirst == NULL) {
196 fFirst = res;
197 res->fNext = NULL;
198 return;
199 }
200
201 const char *resKeyString = fRoot->fKeys + res->fKey;
202
203 SResource *current = fFirst;
204
205 SResource *prev = NULL;
206 while (current != NULL) {
207 const char *currentKeyString = fRoot->fKeys + current->fKey;
208 int diff;
209 /*
210 * formatVersion 1: compare key strings in native-charset order
211 * formatVersion 2 and up: compare key strings in ASCII order
212 */
213 if (gFormatVersion == 1 || U_CHARSET_FAMILY == U_ASCII_FAMILY) {
214 diff = uprv_strcmp(currentKeyString, resKeyString);
215 } else {
216 diff = uprv_compareInvCharsAsAscii(currentKeyString, resKeyString);
217 }
218 if (diff < 0) {
219 prev = current;
220 current = current->fNext;
221 } else if (diff > 0) {
222 /* we're either in front of the list, or in the middle */
223 if (prev == NULL) {
224 /* front of the list */
225 fFirst = res;
226 } else {
227 /* middle of the list */
228 prev->fNext = res;
229 }
230
231 res->fNext = current;
232 return;
233 } else {
234 /* Key already exists! ERROR! */
235 error(linenumber, "duplicate key '%s' in table, first appeared at line %d", currentKeyString, current->line);
236 errorCode = U_UNSUPPORTED_ERROR;
237 return;
238 }
239 }
240
241 /* end of list */
242 prev->fNext = res;
243 res->fNext = NULL;
244}
245
246ArrayResource::~ArrayResource() {}
247
248void ArrayResource::add(SResource *res) {
249 if (res != NULL && res != &kNoResource) {
250 if (fFirst == NULL) {
251 fFirst = res;
252 } else {
253 fLast->fNext = res;
254 }
255 fLast = res;
256 ++fCount;
257 }
258}
259
260PseudoListResource::~PseudoListResource() {}
261
262void PseudoListResource::add(SResource *res) {
263 if (res != NULL && res != &kNoResource) {
264 res->fNext = fFirst;
265 fFirst = res;
266 ++fCount;
267 }
268}
269
270StringBaseResource::StringBaseResource(SRBRoot *bundle, const char *tag, int8_t type,
271 const UChar *value, int32_t len,
272 const UString* comment, UErrorCode &errorCode)
273 : SResource(bundle, tag, type, comment, errorCode) {
274 if (len == 0 && gFormatVersion > 1) {
275 fRes = URES_MAKE_EMPTY_RESOURCE(type);
276 fWritten = TRUE;
277 return;
278 }
279
f3c0d7a5 280 fString.setTo(ConstChar16Ptr(value), len);
2ca993e8
A
281 fString.getTerminatedBuffer(); // Some code relies on NUL-termination.
282 if (U_SUCCESS(errorCode) && fString.isBogus()) {
283 errorCode = U_MEMORY_ALLOCATION_ERROR;
284 }
285}
286
287StringBaseResource::StringBaseResource(SRBRoot *bundle, int8_t type,
288 const icu::UnicodeString &value, UErrorCode &errorCode)
289 : SResource(bundle, NULL, type, NULL, errorCode), fString(value) {
290 if (value.isEmpty() && gFormatVersion > 1) {
291 fRes = URES_MAKE_EMPTY_RESOURCE(type);
292 fWritten = TRUE;
293 return;
294 }
295
296 fString.getTerminatedBuffer(); // Some code relies on NUL-termination.
297 if (U_SUCCESS(errorCode) && fString.isBogus()) {
298 errorCode = U_MEMORY_ALLOCATION_ERROR;
299 }
300}
301
302// Pool bundle string, alias the buffer. Guaranteed NUL-terminated and not empty.
303StringBaseResource::StringBaseResource(int8_t type, const UChar *value, int32_t len,
304 UErrorCode &errorCode)
305 : SResource(NULL, NULL, type, NULL, errorCode), fString(TRUE, value, len) {
306 assert(len > 0);
307 assert(!fString.isBogus());
308}
309
310StringBaseResource::~StringBaseResource() {}
311
312static int32_t U_CALLCONV
313string_hash(const UElement key) {
314 const StringResource *res = static_cast<const StringResource *>(key.pointer);
315 return res->fString.hashCode();
316}
317
318static UBool U_CALLCONV
319string_comp(const UElement key1, const UElement key2) {
320 const StringResource *res1 = static_cast<const StringResource *>(key1.pointer);
321 const StringResource *res2 = static_cast<const StringResource *>(key2.pointer);
322 return res1->fString == res2->fString;
323}
324
325StringResource::~StringResource() {}
326
327AliasResource::~AliasResource() {}
328
329IntResource::IntResource(SRBRoot *bundle, const char *tag, int32_t value,
330 const UString* comment, UErrorCode &errorCode)
331 : SResource(bundle, tag, URES_INT, comment, errorCode) {
332 fValue = value;
333 fRes = URES_MAKE_RESOURCE(URES_INT, value & RES_MAX_OFFSET);
334 fWritten = TRUE;
335}
336
337IntResource::~IntResource() {}
338
339IntVectorResource::IntVectorResource(SRBRoot *bundle, const char *tag,
340 const UString* comment, UErrorCode &errorCode)
341 : SResource(bundle, tag, URES_INT_VECTOR, comment, errorCode),
342 fCount(0), fArray(new uint32_t[RESLIST_MAX_INT_VECTOR]) {
343 if (fArray == NULL) {
344 errorCode = U_MEMORY_ALLOCATION_ERROR;
345 return;
346 }
347}
348
349IntVectorResource::~IntVectorResource() {
350 delete[] fArray;
351}
352
353void IntVectorResource::add(int32_t value, UErrorCode &errorCode) {
354 if (U_SUCCESS(errorCode)) {
355 fArray[fCount++] = value;
356 }
357}
358
359BinaryResource::BinaryResource(SRBRoot *bundle, const char *tag,
360 uint32_t length, uint8_t *data, const char* fileName,
361 const UString* comment, UErrorCode &errorCode)
362 : SResource(bundle, tag, URES_BINARY, comment, errorCode),
363 fLength(length), fData(NULL), fFileName(NULL) {
364 if (U_FAILURE(errorCode)) {
365 return;
366 }
367 if (fileName != NULL && *fileName != 0){
368 fFileName = new char[uprv_strlen(fileName)+1];
369 if (fFileName == NULL) {
370 errorCode = U_MEMORY_ALLOCATION_ERROR;
371 return;
372 }
373 uprv_strcpy(fFileName, fileName);
374 }
375 if (length > 0) {
376 fData = new uint8_t[length];
377 if (fData == NULL) {
378 errorCode = U_MEMORY_ALLOCATION_ERROR;
379 return;
380 }
381 uprv_memcpy(fData, data, length);
382 } else {
383 if (gFormatVersion > 1) {
384 fRes = URES_MAKE_EMPTY_RESOURCE(URES_BINARY);
385 fWritten = TRUE;
386 }
387 }
388}
389
390BinaryResource::~BinaryResource() {
391 delete[] fData;
392 delete[] fFileName;
393}
394
395/* Writing Functions */
396
397void
398StringResource::handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet,
399 UErrorCode &errorCode) {
400 assert(fSame == NULL);
401 fSame = static_cast<StringResource *>(uhash_get(stringSet, this));
402 if (fSame != NULL) {
403 // This is a duplicate of a pool bundle string or of an earlier-visited string.
404 if (++fSame->fNumCopies == 1) {
405 assert(fSame->fWritten);
406 int32_t poolStringIndex = (int32_t)RES_GET_OFFSET(fSame->fRes);
407 if (poolStringIndex >= bundle->fPoolStringIndexLimit) {
408 bundle->fPoolStringIndexLimit = poolStringIndex + 1;
409 }
410 }
411 return;
412 }
413 /* Put this string into the set for finding duplicates. */
414 fNumCopies = 1;
415 uhash_put(stringSet, this, this, &errorCode);
416
417 if (bundle->fStringsForm != STRINGS_UTF16_V1) {
418 int32_t len = length();
419 if (len <= MAX_IMPLICIT_STRING_LENGTH &&
420 !U16_IS_TRAIL(fString[0]) && fString.indexOf((UChar)0) < 0) {
421 /*
422 * This string will be stored without an explicit length.
423 * Runtime will detect !U16_IS_TRAIL(s[0]) and call u_strlen().
424 */
425 fNumCharsForLength = 0;
426 } else if (len <= 0x3ee) {
427 fNumCharsForLength = 1;
428 } else if (len <= 0xfffff) {
429 fNumCharsForLength = 2;
430 } else {
431 fNumCharsForLength = 3;
432 }
433 bundle->f16BitStringsLength += fNumCharsForLength + len + 1; /* +1 for the NUL */
434 }
435}
436
437void
438ContainerResource::handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet,
439 UErrorCode &errorCode) {
440 for (SResource *current = fFirst; current != NULL; current = current->fNext) {
441 current->preflightStrings(bundle, stringSet, errorCode);
442 }
443}
444
445void
446SResource::preflightStrings(SRBRoot *bundle, UHashtable *stringSet, UErrorCode &errorCode) {
447 if (U_FAILURE(errorCode)) {
448 return;
449 }
450 if (fRes != RES_BOGUS) {
451 /*
452 * The resource item word was already precomputed, which means
453 * no further data needs to be written.
454 * This might be an integer, or an empty string/binary/etc.
455 */
456 return;
457 }
458 handlePreflightStrings(bundle, stringSet, errorCode);
459}
460
461void
462SResource::handlePreflightStrings(SRBRoot * /*bundle*/, UHashtable * /*stringSet*/,
463 UErrorCode & /*errorCode*/) {
464 /* Neither a string nor a container. */
465}
466
467int32_t
468SRBRoot::makeRes16(uint32_t resWord) const {
469 if (resWord == 0) {
470 return 0; /* empty string */
471 }
472 uint32_t type = RES_GET_TYPE(resWord);
473 int32_t offset = (int32_t)RES_GET_OFFSET(resWord);
474 if (type == URES_STRING_V2) {
475 assert(offset > 0);
476 if (offset < fPoolStringIndexLimit) {
477 if (offset < fPoolStringIndex16Limit) {
478 return offset;
479 }
480 } else {
481 offset = offset - fPoolStringIndexLimit + fPoolStringIndex16Limit;
482 if (offset <= 0xffff) {
483 return offset;
484 }
485 }
486 }
487 return -1;
488}
489
490int32_t
491SRBRoot::mapKey(int32_t oldpos) const {
492 const KeyMapEntry *map = fKeyMap;
493 if (map == NULL) {
494 return oldpos;
495 }
496 int32_t i, start, limit;
497
498 /* do a binary search for the old, pre-compactKeys() key offset */
499 start = fUsePoolBundle->fKeysCount;
500 limit = start + fKeysCount;
501 while (start < limit - 1) {
502 i = (start + limit) / 2;
503 if (oldpos < map[i].oldpos) {
504 limit = i;
505 } else {
506 start = i;
507 }
508 }
509 assert(oldpos == map[start].oldpos);
510 return map[start].newpos;
511}
512
513/*
514 * Only called for UTF-16 v1 strings and duplicate UTF-16 v2 strings.
515 * For unique UTF-16 v2 strings, write16() sees fRes != RES_BOGUS
516 * and exits early.
517 */
518void
519StringResource::handleWrite16(SRBRoot * /*bundle*/) {
520 SResource *same;
521 if ((same = fSame) != NULL) {
522 /* This is a duplicate. */
523 assert(same->fRes != RES_BOGUS && same->fWritten);
524 fRes = same->fRes;
525 fWritten = same->fWritten;
526 }
527}
528
529void
530ContainerResource::writeAllRes16(SRBRoot *bundle) {
531 for (SResource *current = fFirst; current != NULL; current = current->fNext) {
532 bundle->f16BitUnits.append((UChar)current->fRes16);
533 }
534 fWritten = TRUE;
535}
536
537void
538ArrayResource::handleWrite16(SRBRoot *bundle) {
539 if (fCount == 0 && gFormatVersion > 1) {
540 fRes = URES_MAKE_EMPTY_RESOURCE(URES_ARRAY);
541 fWritten = TRUE;
542 return;
543 }
544
545 int32_t res16 = 0;
546 for (SResource *current = fFirst; current != NULL; current = current->fNext) {
547 current->write16(bundle);
548 res16 |= current->fRes16;
549 }
550 if (fCount <= 0xffff && res16 >= 0 && gFormatVersion > 1) {
551 fRes = URES_MAKE_RESOURCE(URES_ARRAY16, bundle->f16BitUnits.length());
552 bundle->f16BitUnits.append((UChar)fCount);
553 writeAllRes16(bundle);
554 }
555}
556
557void
558TableResource::handleWrite16(SRBRoot *bundle) {
559 if (fCount == 0 && gFormatVersion > 1) {
560 fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE);
561 fWritten = TRUE;
562 return;
563 }
564 /* Find the smallest table type that fits the data. */
565 int32_t key16 = 0;
566 int32_t res16 = 0;
567 for (SResource *current = fFirst; current != NULL; current = current->fNext) {
568 current->write16(bundle);
569 key16 |= current->fKey16;
570 res16 |= current->fRes16;
571 }
572 if(fCount > (uint32_t)bundle->fMaxTableLength) {
573 bundle->fMaxTableLength = fCount;
574 }
575 if (fCount <= 0xffff && key16 >= 0) {
576 if (res16 >= 0 && gFormatVersion > 1) {
577 /* 16-bit count, key offsets and values */
578 fRes = URES_MAKE_RESOURCE(URES_TABLE16, bundle->f16BitUnits.length());
579 bundle->f16BitUnits.append((UChar)fCount);
580 for (SResource *current = fFirst; current != NULL; current = current->fNext) {
581 bundle->f16BitUnits.append((UChar)current->fKey16);
582 }
583 writeAllRes16(bundle);
584 } else {
585 /* 16-bit count, 16-bit key offsets, 32-bit values */
586 fTableType = URES_TABLE;
587 }
588 } else {
589 /* 32-bit count, key offsets and values */
590 fTableType = URES_TABLE32;
591 }
592}
593
594void
595PseudoListResource::handleWrite16(SRBRoot * /*bundle*/) {
596 fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE);
597 fWritten = TRUE;
598}
599
600void
601SResource::write16(SRBRoot *bundle) {
602 if (fKey >= 0) {
603 // A tagged resource has a non-negative key index into the parsed key strings.
604 // compactKeys() built a map from parsed key index to the final key index.
605 // After the mapping, negative key indexes are used for shared pool bundle keys.
606 fKey = bundle->mapKey(fKey);
607 // If the key index fits into a Key16 for a Table or Table16,
608 // then set the fKey16 field accordingly.
609 // Otherwise keep it at -1.
610 if (fKey >= 0) {
611 if (fKey < bundle->fLocalKeyLimit) {
612 fKey16 = fKey;
613 }
614 } else {
615 int32_t poolKeyIndex = fKey & 0x7fffffff;
616 if (poolKeyIndex <= 0xffff) {
617 poolKeyIndex += bundle->fLocalKeyLimit;
618 if (poolKeyIndex <= 0xffff) {
619 fKey16 = poolKeyIndex;
620 }
621 }
622 }
623 }
624 /*
625 * fRes != RES_BOGUS:
626 * The resource item word was already precomputed, which means
627 * no further data needs to be written.
628 * This might be an integer, or an empty or UTF-16 v2 string,
629 * an empty binary, etc.
630 */
631 if (fRes == RES_BOGUS) {
632 handleWrite16(bundle);
633 }
634 // Compute fRes16 for precomputed as well as just-computed fRes.
635 fRes16 = bundle->makeRes16(fRes);
636}
637
638void
639SResource::handleWrite16(SRBRoot * /*bundle*/) {
640 /* Only a few resource types write 16-bit units. */
641}
642
643/*
644 * Only called for UTF-16 v1 strings, and for aliases.
645 * For UTF-16 v2 strings, preWrite() sees fRes != RES_BOGUS
646 * and exits early.
647 */
648void
649StringBaseResource::handlePreWrite(uint32_t *byteOffset) {
650 /* Write the UTF-16 v1 string. */
651 fRes = URES_MAKE_RESOURCE(fType, *byteOffset >> 2);
652 *byteOffset += 4 + (length() + 1) * U_SIZEOF_UCHAR;
653}
654
655void
656IntVectorResource::handlePreWrite(uint32_t *byteOffset) {
657 if (fCount == 0 && gFormatVersion > 1) {
658 fRes = URES_MAKE_EMPTY_RESOURCE(URES_INT_VECTOR);
659 fWritten = TRUE;
660 } else {
661 fRes = URES_MAKE_RESOURCE(URES_INT_VECTOR, *byteOffset >> 2);
662 *byteOffset += (1 + fCount) * 4;
663 }
664}
665
666void
667BinaryResource::handlePreWrite(uint32_t *byteOffset) {
668 uint32_t pad = 0;
669 uint32_t dataStart = *byteOffset + sizeof(fLength);
670
671 if (dataStart % BIN_ALIGNMENT) {
672 pad = (BIN_ALIGNMENT - dataStart % BIN_ALIGNMENT);
673 *byteOffset += pad; /* pad == 4 or 8 or 12 */
674 }
675 fRes = URES_MAKE_RESOURCE(URES_BINARY, *byteOffset >> 2);
676 *byteOffset += 4 + fLength;
677}
678
679void
680ContainerResource::preWriteAllRes(uint32_t *byteOffset) {
681 for (SResource *current = fFirst; current != NULL; current = current->fNext) {
682 current->preWrite(byteOffset);
683 }
684}
685
686void
687ArrayResource::handlePreWrite(uint32_t *byteOffset) {
688 preWriteAllRes(byteOffset);
689 fRes = URES_MAKE_RESOURCE(URES_ARRAY, *byteOffset >> 2);
690 *byteOffset += (1 + fCount) * 4;
691}
692
693void
694TableResource::handlePreWrite(uint32_t *byteOffset) {
695 preWriteAllRes(byteOffset);
696 if (fTableType == URES_TABLE) {
697 /* 16-bit count, 16-bit key offsets, 32-bit values */
698 fRes = URES_MAKE_RESOURCE(URES_TABLE, *byteOffset >> 2);
699 *byteOffset += 2 + fCount * 6;
700 } else {
701 /* 32-bit count, key offsets and values */
702 fRes = URES_MAKE_RESOURCE(URES_TABLE32, *byteOffset >> 2);
703 *byteOffset += 4 + fCount * 8;
704 }
705}
706
707void
708SResource::preWrite(uint32_t *byteOffset) {
709 if (fRes != RES_BOGUS) {
710 /*
711 * The resource item word was already precomputed, which means
712 * no further data needs to be written.
713 * This might be an integer, or an empty or UTF-16 v2 string,
714 * an empty binary, etc.
715 */
716 return;
717 }
718 handlePreWrite(byteOffset);
719 *byteOffset += calcPadding(*byteOffset);
720}
721
722void
723SResource::handlePreWrite(uint32_t * /*byteOffset*/) {
724 assert(FALSE);
725}
726
727/*
728 * Only called for UTF-16 v1 strings, and for aliases. For UTF-16 v2 strings,
729 * write() sees fWritten and exits early.
730 */
731void
732StringBaseResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
733 /* Write the UTF-16 v1 string. */
734 int32_t len = length();
735 udata_write32(mem, len);
736 udata_writeUString(mem, getBuffer(), len + 1);
737 *byteOffset += 4 + (len + 1) * U_SIZEOF_UCHAR;
738 fWritten = TRUE;
739}
740
741void
742ContainerResource::writeAllRes(UNewDataMemory *mem, uint32_t *byteOffset) {
743 uint32_t i = 0;
744 for (SResource *current = fFirst; current != NULL; ++i, current = current->fNext) {
745 current->write(mem, byteOffset);
746 }
747 assert(i == fCount);
748}
749
750void
751ContainerResource::writeAllRes32(UNewDataMemory *mem, uint32_t *byteOffset) {
752 for (SResource *current = fFirst; current != NULL; current = current->fNext) {
753 udata_write32(mem, current->fRes);
754 }
755 *byteOffset += fCount * 4;
756}
757
758void
759ArrayResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
760 writeAllRes(mem, byteOffset);
761 udata_write32(mem, fCount);
762 *byteOffset += 4;
763 writeAllRes32(mem, byteOffset);
764}
765
766void
767IntVectorResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
768 udata_write32(mem, fCount);
769 for(uint32_t i = 0; i < fCount; ++i) {
770 udata_write32(mem, fArray[i]);
771 }
772 *byteOffset += (1 + fCount) * 4;
773}
774
775void
776BinaryResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
777 uint32_t pad = 0;
778 uint32_t dataStart = *byteOffset + sizeof(fLength);
779
780 if (dataStart % BIN_ALIGNMENT) {
781 pad = (BIN_ALIGNMENT - dataStart % BIN_ALIGNMENT);
782 udata_writePadding(mem, pad); /* pad == 4 or 8 or 12 */
783 *byteOffset += pad;
784 }
785
786 udata_write32(mem, fLength);
787 if (fLength > 0) {
788 udata_writeBlock(mem, fData, fLength);
789 }
790 *byteOffset += 4 + fLength;
791}
792
793void
794TableResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
795 writeAllRes(mem, byteOffset);
796 if(fTableType == URES_TABLE) {
797 udata_write16(mem, (uint16_t)fCount);
798 for (SResource *current = fFirst; current != NULL; current = current->fNext) {
799 udata_write16(mem, current->fKey16);
800 }
801 *byteOffset += (1 + fCount)* 2;
802 if ((fCount & 1) == 0) {
803 /* 16-bit count and even number of 16-bit key offsets need padding before 32-bit resource items */
804 udata_writePadding(mem, 2);
805 *byteOffset += 2;
806 }
807 } else /* URES_TABLE32 */ {
808 udata_write32(mem, fCount);
809 for (SResource *current = fFirst; current != NULL; current = current->fNext) {
810 udata_write32(mem, (uint32_t)current->fKey);
811 }
812 *byteOffset += (1 + fCount)* 4;
813 }
814 writeAllRes32(mem, byteOffset);
815}
816
817void
818SResource::write(UNewDataMemory *mem, uint32_t *byteOffset) {
819 if (fWritten) {
820 assert(fRes != RES_BOGUS);
821 return;
822 }
823 handleWrite(mem, byteOffset);
824 uint8_t paddingSize = calcPadding(*byteOffset);
825 if (paddingSize > 0) {
826 udata_writePadding(mem, paddingSize);
827 *byteOffset += paddingSize;
828 }
829 fWritten = TRUE;
830}
831
832void
833SResource::handleWrite(UNewDataMemory * /*mem*/, uint32_t * /*byteOffset*/) {
834 assert(FALSE);
835}
836
837void SRBRoot::write(const char *outputDir, const char *outputPkg,
838 char *writtenFilename, int writtenFilenameLen,
839 UErrorCode &errorCode) {
840 UNewDataMemory *mem = NULL;
841 uint32_t byteOffset = 0;
842 uint32_t top, size;
843 char dataName[1024];
844 int32_t indexes[URES_INDEX_TOP];
845
846 compactKeys(errorCode);
847 /*
848 * Add padding bytes to fKeys so that fKeysTop is 4-aligned.
849 * Safe because the capacity is a multiple of 4.
850 */
851 while (fKeysTop & 3) {
852 fKeys[fKeysTop++] = (char)0xaa;
853 }
854 /*
855 * In URES_TABLE, use all local key offsets that fit into 16 bits,
856 * and use the remaining 16-bit offsets for pool key offsets
857 * if there are any.
858 * If there are no local keys, then use the whole 16-bit space
859 * for pool key offsets.
860 * Note: This cannot be changed without changing the major formatVersion.
861 */
862 if (fKeysBottom < fKeysTop) {
863 if (fKeysTop <= 0x10000) {
864 fLocalKeyLimit = fKeysTop;
865 } else {
866 fLocalKeyLimit = 0x10000;
867 }
868 } else {
869 fLocalKeyLimit = 0;
870 }
871
872 UHashtable *stringSet;
873 if (gFormatVersion > 1) {
874 stringSet = uhash_open(string_hash, string_comp, string_comp, &errorCode);
875 if (U_SUCCESS(errorCode) &&
876 fUsePoolBundle != NULL && fUsePoolBundle->fStrings != NULL) {
877 for (SResource *current = fUsePoolBundle->fStrings->fFirst;
878 current != NULL;
879 current = current->fNext) {
880 StringResource *sr = static_cast<StringResource *>(current);
881 sr->fNumCopies = 0;
882 sr->fNumUnitsSaved = 0;
883 uhash_put(stringSet, sr, sr, &errorCode);
884 }
885 }
886 fRoot->preflightStrings(this, stringSet, errorCode);
887 } else {
888 stringSet = NULL;
889 }
890 if (fStringsForm == STRINGS_UTF16_V2 && f16BitStringsLength > 0) {
891 compactStringsV2(stringSet, errorCode);
892 }
893 uhash_close(stringSet);
894 if (U_FAILURE(errorCode)) {
895 return;
896 }
897
898 int32_t formatVersion = gFormatVersion;
899 if (fPoolStringIndexLimit != 0) {
900 int32_t sum = fPoolStringIndexLimit + fLocalStringIndexLimit;
901 if ((sum - 1) > RES_MAX_OFFSET) {
902 errorCode = U_BUFFER_OVERFLOW_ERROR;
903 return;
904 }
905 if (fPoolStringIndexLimit < 0x10000 && sum <= 0x10000) {
906 // 16-bit indexes work for all pool + local strings.
907 fPoolStringIndex16Limit = fPoolStringIndexLimit;
908 } else {
909 // Set the pool index threshold so that 16-bit indexes work
910 // for some pool strings and some local strings.
911 fPoolStringIndex16Limit = (int32_t)(
912 ((int64_t)fPoolStringIndexLimit * 0xffff) / sum);
913 }
914 } else if (gIsDefaultFormatVersion && formatVersion == 3 && !fIsPoolBundle) {
915 // If we just default to formatVersion 3
916 // but there are no pool bundle strings to share
917 // and we do not write a pool bundle,
918 // then write formatVersion 2 which is just as good.
919 formatVersion = 2;
920 }
921
922 fRoot->write16(this);
923 if (f16BitUnits.isBogus()) {
924 errorCode = U_MEMORY_ALLOCATION_ERROR;
925 return;
926 }
927 if (f16BitUnits.length() & 1) {
928 f16BitUnits.append((UChar)0xaaaa); /* pad to multiple of 4 bytes */
929 }
2ca993e8
A
930
931 byteOffset = fKeysTop + f16BitUnits.length() * 2;
932 fRoot->preWrite(&byteOffset);
933
934 /* total size including the root item */
935 top = byteOffset;
936
937 if (writtenFilename && writtenFilenameLen) {
938 *writtenFilename = 0;
939 }
940
941 if (writtenFilename) {
942 int32_t off = 0, len = 0;
943 if (outputDir) {
944 len = (int32_t)uprv_strlen(outputDir);
945 if (len > writtenFilenameLen) {
946 len = writtenFilenameLen;
947 }
948 uprv_strncpy(writtenFilename, outputDir, len);
949 }
950 if (writtenFilenameLen -= len) {
951 off += len;
952 writtenFilename[off] = U_FILE_SEP_CHAR;
953 if (--writtenFilenameLen) {
954 ++off;
955 if(outputPkg != NULL)
956 {
957 uprv_strcpy(writtenFilename+off, outputPkg);
958 off += (int32_t)uprv_strlen(outputPkg);
959 writtenFilename[off] = '_';
960 ++off;
961 }
962
963 len = (int32_t)uprv_strlen(fLocale);
964 if (len > writtenFilenameLen) {
965 len = writtenFilenameLen;
966 }
967 uprv_strncpy(writtenFilename + off, fLocale, len);
968 if (writtenFilenameLen -= len) {
969 off += len;
970 len = 5;
971 if (len > writtenFilenameLen) {
972 len = writtenFilenameLen;
973 }
974 uprv_strncpy(writtenFilename + off, ".res", len);
975 }
976 }
977 }
978 }
979
980 if(outputPkg)
981 {
982 uprv_strcpy(dataName, outputPkg);
983 uprv_strcat(dataName, "_");
984 uprv_strcat(dataName, fLocale);
985 }
986 else
987 {
988 uprv_strcpy(dataName, fLocale);
989 }
990
991 uprv_memcpy(dataInfo.formatVersion, gFormatVersions + formatVersion, sizeof(UVersionInfo));
992
993 mem = udata_create(outputDir, "res", dataName,
994 &dataInfo, (gIncludeCopyright==TRUE)? U_COPYRIGHT_STRING:NULL, &errorCode);
995 if(U_FAILURE(errorCode)){
996 return;
997 }
998
999 /* write the root item */
1000 udata_write32(mem, fRoot->fRes);
1001
1002 /*
1003 * formatVersion 1.1 (ICU 2.8):
1004 * write int32_t indexes[] after root and before the key strings
1005 * to make it easier to parse resource bundles in icuswap or from Java etc.
1006 */
1007 uprv_memset(indexes, 0, sizeof(indexes));
1008 indexes[URES_INDEX_LENGTH]= fIndexLength;
1009 indexes[URES_INDEX_KEYS_TOP]= fKeysTop>>2;
1010 indexes[URES_INDEX_RESOURCES_TOP]= (int32_t)(top>>2);
1011 indexes[URES_INDEX_BUNDLE_TOP]= indexes[URES_INDEX_RESOURCES_TOP];
1012 indexes[URES_INDEX_MAX_TABLE_LENGTH]= fMaxTableLength;
1013
1014 /*
1015 * formatVersion 1.2 (ICU 3.6):
1016 * write indexes[URES_INDEX_ATTRIBUTES] with URES_ATT_NO_FALLBACK set or not set
1017 * the memset() above initialized all indexes[] to 0
1018 */
1019 if (fNoFallback) {
1020 indexes[URES_INDEX_ATTRIBUTES]=URES_ATT_NO_FALLBACK;
1021 }
1022 /*
1023 * formatVersion 2.0 (ICU 4.4):
1024 * more compact string value storage, optional pool bundle
1025 */
1026 if (URES_INDEX_16BIT_TOP < fIndexLength) {
1027 indexes[URES_INDEX_16BIT_TOP] = (fKeysTop>>2) + (f16BitUnits.length()>>1);
1028 }
1029 if (URES_INDEX_POOL_CHECKSUM < fIndexLength) {
1030 if (fIsPoolBundle) {
1031 indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_IS_POOL_BUNDLE | URES_ATT_NO_FALLBACK;
1032 uint32_t checksum = computeCRC((const char *)(fKeys + fKeysBottom),
1033 (uint32_t)(fKeysTop - fKeysBottom), 0);
1034 if (f16BitUnits.length() <= 1) {
1035 // no pool strings to checksum
1036 } else if (U_IS_BIG_ENDIAN) {
f3c0d7a5 1037 checksum = computeCRC(reinterpret_cast<const char *>(f16BitUnits.getBuffer()),
2ca993e8
A
1038 (uint32_t)f16BitUnits.length() * 2, checksum);
1039 } else {
1040 // Swap to big-endian so we get the same checksum on all platforms
1041 // (except for charset family, due to the key strings).
1042 UnicodeString s(f16BitUnits);
2ca993e8 1043 assert(!s.isBogus());
3d1f044b
A
1044 // .getBuffer(capacity) returns a mutable buffer
1045 char16_t* p = s.getBuffer(f16BitUnits.length());
2ca993e8
A
1046 for (int32_t count = f16BitUnits.length(); count > 0; --count) {
1047 uint16_t x = *p;
1048 *p++ = (uint16_t)((x << 8) | (x >> 8));
1049 }
3d1f044b
A
1050 s.releaseBuffer(f16BitUnits.length());
1051 checksum = computeCRC((const char *)s.getBuffer(),
2ca993e8
A
1052 (uint32_t)f16BitUnits.length() * 2, checksum);
1053 }
1054 indexes[URES_INDEX_POOL_CHECKSUM] = (int32_t)checksum;
1055 } else if (gUsePoolBundle) {
1056 indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_USES_POOL_BUNDLE;
1057 indexes[URES_INDEX_POOL_CHECKSUM] = fUsePoolBundle->fChecksum;
1058 }
1059 }
1060 // formatVersion 3 (ICU 56):
1061 // share string values via pool bundle strings
1062 indexes[URES_INDEX_LENGTH] |= fPoolStringIndexLimit << 8; // bits 23..0 -> 31..8
1063 indexes[URES_INDEX_ATTRIBUTES] |= (fPoolStringIndexLimit >> 12) & 0xf000; // bits 27..24 -> 15..12
1064 indexes[URES_INDEX_ATTRIBUTES] |= fPoolStringIndex16Limit << 16;
1065
1066 /* write the indexes[] */
1067 udata_writeBlock(mem, indexes, fIndexLength*4);
1068
1069 /* write the table key strings */
1070 udata_writeBlock(mem, fKeys+fKeysBottom,
1071 fKeysTop-fKeysBottom);
1072
1073 /* write the v2 UTF-16 strings, URES_TABLE16 and URES_ARRAY16 */
1074 udata_writeBlock(mem, f16BitUnits.getBuffer(), f16BitUnits.length()*2);
1075
1076 /* write all of the bundle contents: the root item and its children */
1077 byteOffset = fKeysTop + f16BitUnits.length() * 2;
1078 fRoot->write(mem, &byteOffset);
1079 assert(byteOffset == top);
1080
1081 size = udata_finish(mem, &errorCode);
1082 if(top != size) {
1083 fprintf(stderr, "genrb error: wrote %u bytes but counted %u\n",
1084 (int)size, (int)top);
1085 errorCode = U_INTERNAL_PROGRAM_ERROR;
1086 }
1087}
1088
1089/* Opening Functions */
1090
1091TableResource* table_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
1092 LocalPointer<TableResource> res(new TableResource(bundle, tag, comment, *status), *status);
1093 return U_SUCCESS(*status) ? res.orphan() : NULL;
1094}
1095
1096ArrayResource* array_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
1097 LocalPointer<ArrayResource> res(new ArrayResource(bundle, tag, comment, *status), *status);
1098 return U_SUCCESS(*status) ? res.orphan() : NULL;
1099}
1100
1101struct SResource *string_open(struct SRBRoot *bundle, const char *tag, const UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
1102 LocalPointer<SResource> res(
1103 new StringResource(bundle, tag, value, len, comment, *status), *status);
1104 return U_SUCCESS(*status) ? res.orphan() : NULL;
1105}
1106
1107struct SResource *alias_open(struct SRBRoot *bundle, const char *tag, UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
1108 LocalPointer<SResource> res(
1109 new AliasResource(bundle, tag, value, len, comment, *status), *status);
1110 return U_SUCCESS(*status) ? res.orphan() : NULL;
1111}
1112
1113IntVectorResource *intvector_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
1114 LocalPointer<IntVectorResource> res(
1115 new IntVectorResource(bundle, tag, comment, *status), *status);
1116 return U_SUCCESS(*status) ? res.orphan() : NULL;
1117}
1118
1119struct SResource *int_open(struct SRBRoot *bundle, const char *tag, int32_t value, const struct UString* comment, UErrorCode *status) {
1120 LocalPointer<SResource> res(new IntResource(bundle, tag, value, comment, *status), *status);
1121 return U_SUCCESS(*status) ? res.orphan() : NULL;
1122}
1123
1124struct SResource *bin_open(struct SRBRoot *bundle, const char *tag, uint32_t length, uint8_t *data, const char* fileName, const struct UString* comment, UErrorCode *status) {
1125 LocalPointer<SResource> res(
1126 new BinaryResource(bundle, tag, length, data, fileName, comment, *status), *status);
1127 return U_SUCCESS(*status) ? res.orphan() : NULL;
1128}
1129
1130SRBRoot::SRBRoot(const UString *comment, UBool isPoolBundle, UErrorCode &errorCode)
1131 : fRoot(NULL), fLocale(NULL), fIndexLength(0), fMaxTableLength(0), fNoFallback(FALSE),
1132 fStringsForm(STRINGS_UTF16_V1), fIsPoolBundle(isPoolBundle),
1133 fKeys(NULL), fKeyMap(NULL),
3d1f044b
A
1134 fKeysBottom(0), fKeysTop(0), fKeysCapacity(0),
1135 fKeysCount(0), fLocalKeyLimit(0),
2ca993e8
A
1136 f16BitUnits(), f16BitStringsLength(0),
1137 fUsePoolBundle(&kNoPoolBundle),
1138 fPoolStringIndexLimit(0), fPoolStringIndex16Limit(0), fLocalStringIndexLimit(0),
1139 fWritePoolBundle(NULL) {
1140 if (U_FAILURE(errorCode)) {
1141 return;
1142 }
1143
1144 if (gFormatVersion > 1) {
1145 // f16BitUnits must start with a zero for empty resources.
1146 // We might be able to omit it if there are no empty 16-bit resources.
1147 f16BitUnits.append((UChar)0);
1148 }
1149
1150 fKeys = (char *) uprv_malloc(sizeof(char) * KEY_SPACE_SIZE);
1151 if (isPoolBundle) {
1152 fRoot = new PseudoListResource(this, errorCode);
1153 } else {
1154 fRoot = new TableResource(this, NULL, comment, errorCode);
1155 }
1156 if (fKeys == NULL || fRoot == NULL || U_FAILURE(errorCode)) {
1157 if (U_SUCCESS(errorCode)) {
1158 errorCode = U_MEMORY_ALLOCATION_ERROR;
1159 }
1160 return;
1161 }
1162
1163 fKeysCapacity = KEY_SPACE_SIZE;
1164 /* formatVersion 1.1 and up: start fKeysTop after the root item and indexes[] */
1165 if (gUsePoolBundle || isPoolBundle) {
1166 fIndexLength = URES_INDEX_POOL_CHECKSUM + 1;
1167 } else if (gFormatVersion >= 2) {
1168 fIndexLength = URES_INDEX_16BIT_TOP + 1;
1169 } else /* formatVersion 1 */ {
1170 fIndexLength = URES_INDEX_ATTRIBUTES + 1;
1171 }
1172 fKeysBottom = (1 /* root */ + fIndexLength) * 4;
1173 uprv_memset(fKeys, 0, fKeysBottom);
1174 fKeysTop = fKeysBottom;
1175
1176 if (gFormatVersion == 1) {
1177 fStringsForm = STRINGS_UTF16_V1;
1178 } else {
1179 fStringsForm = STRINGS_UTF16_V2;
1180 }
1181}
1182
1183/* Closing Functions */
1184
1185void res_close(struct SResource *res) {
1186 delete res;
1187}
1188
1189SRBRoot::~SRBRoot() {
1190 delete fRoot;
1191 uprv_free(fLocale);
1192 uprv_free(fKeys);
1193 uprv_free(fKeyMap);
1194}
1195
1196/* Misc Functions */
1197
1198void SRBRoot::setLocale(UChar *locale, UErrorCode &errorCode) {
1199 if(U_FAILURE(errorCode)) {
1200 return;
1201 }
1202
1203 uprv_free(fLocale);
1204 fLocale = (char*) uprv_malloc(sizeof(char) * (u_strlen(locale)+1));
1205 if(fLocale == NULL) {
1206 errorCode = U_MEMORY_ALLOCATION_ERROR;
1207 return;
1208 }
1209
1210 u_UCharsToChars(locale, fLocale, u_strlen(locale)+1);
1211}
1212
1213const char *
1214SRBRoot::getKeyString(int32_t key) const {
1215 if (key < 0) {
1216 return fUsePoolBundle->fKeys + (key & 0x7fffffff);
1217 } else {
1218 return fKeys + key;
1219 }
1220}
1221
1222const char *
1223SResource::getKeyString(const SRBRoot *bundle) const {
1224 if (fKey == -1) {
1225 return NULL;
1226 }
1227 return bundle->getKeyString(fKey);
1228}
1229
1230const char *
1231SRBRoot::getKeyBytes(int32_t *pLength) const {
1232 *pLength = fKeysTop - fKeysBottom;
1233 return fKeys + fKeysBottom;
1234}
1235
1236int32_t
1237SRBRoot::addKeyBytes(const char *keyBytes, int32_t length, UErrorCode &errorCode) {
1238 int32_t keypos;
1239
3d1f044b
A
1240 // It is not legal to add new key bytes after compactKeys is run!
1241 U_ASSERT(fKeyMap == nullptr);
1242
2ca993e8
A
1243 if (U_FAILURE(errorCode)) {
1244 return -1;
1245 }
1246 if (length < 0 || (keyBytes == NULL && length != 0)) {
1247 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1248 return -1;
1249 }
1250 if (length == 0) {
1251 return fKeysTop;
1252 }
1253
1254 keypos = fKeysTop;
1255 fKeysTop += length;
1256 if (fKeysTop >= fKeysCapacity) {
1257 /* overflow - resize the keys buffer */
1258 fKeysCapacity += KEY_SPACE_SIZE;
1259 fKeys = static_cast<char *>(uprv_realloc(fKeys, fKeysCapacity));
1260 if(fKeys == NULL) {
1261 errorCode = U_MEMORY_ALLOCATION_ERROR;
1262 return -1;
1263 }
1264 }
1265
1266 uprv_memcpy(fKeys + keypos, keyBytes, length);
1267
1268 return keypos;
1269}
1270
1271int32_t
1272SRBRoot::addTag(const char *tag, UErrorCode &errorCode) {
1273 int32_t keypos;
1274
1275 if (U_FAILURE(errorCode)) {
1276 return -1;
1277 }
1278
1279 if (tag == NULL) {
1280 /* no error: the root table and array items have no keys */
1281 return -1;
1282 }
1283
1284 keypos = addKeyBytes(tag, (int32_t)(uprv_strlen(tag) + 1), errorCode);
1285 if (U_SUCCESS(errorCode)) {
1286 ++fKeysCount;
1287 }
1288 return keypos;
1289}
1290
1291static int32_t
1292compareInt32(int32_t lPos, int32_t rPos) {
1293 /*
1294 * Compare possibly-negative key offsets. Don't just return lPos - rPos
1295 * because that is prone to negative-integer underflows.
1296 */
1297 if (lPos < rPos) {
1298 return -1;
1299 } else if (lPos > rPos) {
1300 return 1;
1301 } else {
1302 return 0;
1303 }
1304}
1305
1306static int32_t U_CALLCONV
1307compareKeySuffixes(const void *context, const void *l, const void *r) {
1308 const struct SRBRoot *bundle=(const struct SRBRoot *)context;
1309 int32_t lPos = ((const KeyMapEntry *)l)->oldpos;
1310 int32_t rPos = ((const KeyMapEntry *)r)->oldpos;
1311 const char *lStart = bundle->getKeyString(lPos);
1312 const char *lLimit = lStart;
1313 const char *rStart = bundle->getKeyString(rPos);
1314 const char *rLimit = rStart;
1315 int32_t diff;
1316 while (*lLimit != 0) { ++lLimit; }
1317 while (*rLimit != 0) { ++rLimit; }
1318 /* compare keys in reverse character order */
1319 while (lStart < lLimit && rStart < rLimit) {
1320 diff = (int32_t)(uint8_t)*--lLimit - (int32_t)(uint8_t)*--rLimit;
1321 if (diff != 0) {
1322 return diff;
1323 }
1324 }
1325 /* sort equal suffixes by descending key length */
1326 diff = (int32_t)(rLimit - rStart) - (int32_t)(lLimit - lStart);
1327 if (diff != 0) {
1328 return diff;
1329 }
1330 /* Sort pool bundle keys first (negative oldpos), and otherwise keys in parsing order. */
1331 return compareInt32(lPos, rPos);
1332}
1333
1334static int32_t U_CALLCONV
1335compareKeyNewpos(const void * /*context*/, const void *l, const void *r) {
1336 return compareInt32(((const KeyMapEntry *)l)->newpos, ((const KeyMapEntry *)r)->newpos);
1337}
1338
1339static int32_t U_CALLCONV
1340compareKeyOldpos(const void * /*context*/, const void *l, const void *r) {
1341 return compareInt32(((const KeyMapEntry *)l)->oldpos, ((const KeyMapEntry *)r)->oldpos);
1342}
1343
3d1f044b
A
1344void SResource::collectKeys(std::function<void(int32_t)> collector) const {
1345 collector(fKey);
1346}
1347
1348void ContainerResource::collectKeys(std::function<void(int32_t)> collector) const {
1349 collector(fKey);
1350 for (SResource* curr = fFirst; curr != NULL; curr = curr->fNext) {
1351 curr->collectKeys(collector);
1352 }
1353}
1354
2ca993e8
A
1355void
1356SRBRoot::compactKeys(UErrorCode &errorCode) {
1357 KeyMapEntry *map;
1358 char *keys;
1359 int32_t i;
3d1f044b
A
1360
1361 // Except for pool bundles, keys might not be used.
1362 // Do not add unused keys to the final bundle.
1363 std::set<int32_t> keysInUse;
1364 if (!fIsPoolBundle) {
1365 fRoot->collectKeys([&keysInUse](int32_t key) {
1366 if (key >= 0) {
1367 keysInUse.insert(key);
1368 }
1369 });
1370 fKeysCount = static_cast<int32_t>(keysInUse.size());
1371 }
1372
2ca993e8
A
1373 int32_t keysCount = fUsePoolBundle->fKeysCount + fKeysCount;
1374 if (U_FAILURE(errorCode) || fKeysCount == 0 || fKeyMap != NULL) {
1375 return;
1376 }
1377 map = (KeyMapEntry *)uprv_malloc(keysCount * sizeof(KeyMapEntry));
1378 if (map == NULL) {
1379 errorCode = U_MEMORY_ALLOCATION_ERROR;
1380 return;
1381 }
1382 keys = (char *)fUsePoolBundle->fKeys;
1383 for (i = 0; i < fUsePoolBundle->fKeysCount; ++i) {
1384 map[i].oldpos =
1385 (int32_t)(keys - fUsePoolBundle->fKeys) | 0x80000000; /* negative oldpos */
1386 map[i].newpos = 0;
1387 while (*keys != 0) { ++keys; } /* skip the key */
1388 ++keys; /* skip the NUL */
1389 }
1390 keys = fKeys + fKeysBottom;
3d1f044b
A
1391 while (i < keysCount) {
1392 int32_t keyOffset = static_cast<int32_t>(keys - fKeys);
1393 if (!fIsPoolBundle && keysInUse.count(keyOffset) == 0) {
1394 // Mark the unused key as deleted
1395 while (*keys != 0) { *keys++ = 1; }
1396 *keys++ = 1;
1397 } else {
1398 map[i].oldpos = keyOffset;
1399 map[i].newpos = 0;
1400 while (*keys != 0) { ++keys; } /* skip the key */
1401 ++keys; /* skip the NUL */
1402 i++;
1403 }
1404 }
1405 if (keys != fKeys + fKeysTop) {
1406 // Throw away any unused keys from the end
1407 fKeysTop = static_cast<int32_t>(keys - fKeys);
2ca993e8
A
1408 }
1409 /* Sort the keys so that each one is immediately followed by all of its suffixes. */
1410 uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
1411 compareKeySuffixes, this, FALSE, &errorCode);
1412 /*
1413 * Make suffixes point into earlier, longer strings that contain them
1414 * and mark the old, now unused suffix bytes as deleted.
1415 */
1416 if (U_SUCCESS(errorCode)) {
1417 keys = fKeys;
1418 for (i = 0; i < keysCount;) {
1419 /*
1420 * This key is not a suffix of the previous one;
1421 * keep this one and delete the following ones that are
1422 * suffixes of this one.
1423 */
1424 const char *key;
1425 const char *keyLimit;
1426 int32_t j = i + 1;
1427 map[i].newpos = map[i].oldpos;
1428 if (j < keysCount && map[j].oldpos < 0) {
1429 /* Key string from the pool bundle, do not delete. */
1430 i = j;
1431 continue;
1432 }
1433 key = getKeyString(map[i].oldpos);
1434 for (keyLimit = key; *keyLimit != 0; ++keyLimit) {}
1435 for (; j < keysCount && map[j].oldpos >= 0; ++j) {
1436 const char *k;
1437 char *suffix;
1438 const char *suffixLimit;
1439 int32_t offset;
1440 suffix = keys + map[j].oldpos;
1441 for (suffixLimit = suffix; *suffixLimit != 0; ++suffixLimit) {}
3d1f044b 1442 offset = static_cast<int32_t>((keyLimit - key) - (suffixLimit - suffix));
2ca993e8
A
1443 if (offset < 0) {
1444 break; /* suffix cannot be longer than the original */
1445 }
1446 /* Is it a suffix of the earlier, longer key? */
1447 for (k = keyLimit; suffix < suffixLimit && *--k == *--suffixLimit;) {}
1448 if (suffix == suffixLimit && *k == *suffixLimit) {
1449 map[j].newpos = map[i].oldpos + offset; /* yes, point to the earlier key */
3d1f044b 1450 // Mark the suffix as deleted
2ca993e8
A
1451 while (*suffix != 0) { *suffix++ = 1; }
1452 *suffix = 1;
1453 } else {
1454 break; /* not a suffix, restart from here */
1455 }
1456 }
1457 i = j;
1458 }
1459 /*
1460 * Re-sort by newpos, then modify the key characters array in-place
1461 * to squeeze out unused bytes, and readjust the newpos offsets.
1462 */
1463 uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
1464 compareKeyNewpos, NULL, FALSE, &errorCode);
1465 if (U_SUCCESS(errorCode)) {
1466 int32_t oldpos, newpos, limit;
1467 oldpos = newpos = fKeysBottom;
1468 limit = fKeysTop;
1469 /* skip key offsets that point into the pool bundle rather than this new bundle */
1470 for (i = 0; i < keysCount && map[i].newpos < 0; ++i) {}
1471 if (i < keysCount) {
1472 while (oldpos < limit) {
1473 if (keys[oldpos] == 1) {
1474 ++oldpos; /* skip unused bytes */
1475 } else {
1476 /* adjust the new offsets for keys starting here */
1477 while (i < keysCount && map[i].newpos == oldpos) {
1478 map[i++].newpos = newpos;
1479 }
1480 /* move the key characters to their new position */
1481 keys[newpos++] = keys[oldpos++];
1482 }
1483 }
3d1f044b 1484 U_ASSERT(i == keysCount);
2ca993e8
A
1485 }
1486 fKeysTop = newpos;
1487 /* Re-sort once more, by old offsets for binary searching. */
1488 uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
1489 compareKeyOldpos, NULL, FALSE, &errorCode);
1490 if (U_SUCCESS(errorCode)) {
1491 /* key size reduction by limit - newpos */
1492 fKeyMap = map;
1493 map = NULL;
1494 }
1495 }
1496 }
1497 uprv_free(map);
1498}
1499
1500static int32_t U_CALLCONV
1501compareStringSuffixes(const void * /*context*/, const void *l, const void *r) {
1502 const StringResource *left = *((const StringResource **)l);
1503 const StringResource *right = *((const StringResource **)r);
1504 const UChar *lStart = left->getBuffer();
1505 const UChar *lLimit = lStart + left->length();
1506 const UChar *rStart = right->getBuffer();
1507 const UChar *rLimit = rStart + right->length();
1508 int32_t diff;
1509 /* compare keys in reverse character order */
1510 while (lStart < lLimit && rStart < rLimit) {
1511 diff = (int32_t)*--lLimit - (int32_t)*--rLimit;
1512 if (diff != 0) {
1513 return diff;
1514 }
1515 }
1516 /* sort equal suffixes by descending string length */
1517 return right->length() - left->length();
1518}
1519
1520static int32_t U_CALLCONV
1521compareStringLengths(const void * /*context*/, const void *l, const void *r) {
1522 const StringResource *left = *((const StringResource **)l);
1523 const StringResource *right = *((const StringResource **)r);
1524 int32_t diff;
1525 /* Make "is suffix of another string" compare greater than a non-suffix. */
1526 diff = (int)(left->fSame != NULL) - (int)(right->fSame != NULL);
1527 if (diff != 0) {
1528 return diff;
1529 }
1530 /* sort by ascending string length */
1531 diff = left->length() - right->length();
1532 if (diff != 0) {
1533 return diff;
1534 }
1535 // sort by descending size reduction
1536 diff = right->fNumUnitsSaved - left->fNumUnitsSaved;
1537 if (diff != 0) {
1538 return diff;
1539 }
1540 // sort lexically
1541 return left->fString.compare(right->fString);
1542}
1543
1544void
1545StringResource::writeUTF16v2(int32_t base, UnicodeString &dest) {
1546 int32_t len = length();
1547 fRes = URES_MAKE_RESOURCE(URES_STRING_V2, base + dest.length());
1548 fWritten = TRUE;
1549 switch(fNumCharsForLength) {
1550 case 0:
1551 break;
1552 case 1:
1553 dest.append((UChar)(0xdc00 + len));
1554 break;
1555 case 2:
1556 dest.append((UChar)(0xdfef + (len >> 16)));
1557 dest.append((UChar)len);
1558 break;
1559 case 3:
1560 dest.append((UChar)0xdfff);
1561 dest.append((UChar)(len >> 16));
1562 dest.append((UChar)len);
1563 break;
1564 default:
1565 break; /* will not occur */
1566 }
1567 dest.append(fString);
1568 dest.append((UChar)0);
1569}
1570
1571void
1572SRBRoot::compactStringsV2(UHashtable *stringSet, UErrorCode &errorCode) {
1573 if (U_FAILURE(errorCode)) {
1574 return;
1575 }
1576 // Store the StringResource pointers in an array for
1577 // easy sorting and processing.
1578 // We enumerate a set of strings, so there are no duplicates.
1579 int32_t count = uhash_count(stringSet);
1580 LocalArray<StringResource *> array(new StringResource *[count], errorCode);
1581 if (U_FAILURE(errorCode)) {
1582 return;
1583 }
1584 for (int32_t pos = UHASH_FIRST, i = 0; i < count; ++i) {
1585 array[i] = (StringResource *)uhash_nextElement(stringSet, &pos)->key.pointer;
1586 }
1587 /* Sort the strings so that each one is immediately followed by all of its suffixes. */
1588 uprv_sortArray(array.getAlias(), count, (int32_t)sizeof(struct SResource **),
1589 compareStringSuffixes, NULL, FALSE, &errorCode);
1590 if (U_FAILURE(errorCode)) {
1591 return;
1592 }
1593 /*
1594 * Make suffixes point into earlier, longer strings that contain them.
1595 * Temporarily use fSame and fSuffixOffset for suffix strings to
1596 * refer to the remaining ones.
1597 */
1598 for (int32_t i = 0; i < count;) {
1599 /*
1600 * This string is not a suffix of the previous one;
1601 * write this one and subsume the following ones that are
1602 * suffixes of this one.
1603 */
1604 StringResource *res = array[i];
1605 res->fNumUnitsSaved = (res->fNumCopies - 1) * res->get16BitStringsLength();
1606 // Whole duplicates of pool strings are already account for in fPoolStringIndexLimit,
1607 // see StringResource::handlePreflightStrings().
1608 int32_t j;
1609 for (j = i + 1; j < count; ++j) {
1610 StringResource *suffixRes = array[j];
1611 /* Is it a suffix of the earlier, longer string? */
1612 if (res->fString.endsWith(suffixRes->fString)) {
1613 assert(res->length() != suffixRes->length()); // Set strings are unique.
1614 if (suffixRes->fWritten) {
1615 // Pool string, skip.
1616 } else if (suffixRes->fNumCharsForLength == 0) {
1617 /* yes, point to the earlier string */
1618 suffixRes->fSame = res;
1619 suffixRes->fSuffixOffset = res->length() - suffixRes->length();
1620 if (res->fWritten) {
1621 // Suffix-share res which is a pool string.
1622 // Compute the resource word and collect the maximum.
1623 suffixRes->fRes =
1624 res->fRes + res->fNumCharsForLength + suffixRes->fSuffixOffset;
1625 int32_t poolStringIndex = (int32_t)RES_GET_OFFSET(suffixRes->fRes);
1626 if (poolStringIndex >= fPoolStringIndexLimit) {
1627 fPoolStringIndexLimit = poolStringIndex + 1;
1628 }
1629 suffixRes->fWritten = TRUE;
1630 }
1631 res->fNumUnitsSaved += suffixRes->fNumCopies * suffixRes->get16BitStringsLength();
1632 } else {
1633 /* write the suffix by itself if we need explicit length */
1634 }
1635 } else {
1636 break; /* not a suffix, restart from here */
1637 }
1638 }
1639 i = j;
1640 }
1641 /*
1642 * Re-sort the strings by ascending length (except suffixes last)
1643 * to optimize for URES_TABLE16 and URES_ARRAY16:
1644 * Keep as many as possible within reach of 16-bit offsets.
1645 */
1646 uprv_sortArray(array.getAlias(), count, (int32_t)sizeof(struct SResource **),
1647 compareStringLengths, NULL, FALSE, &errorCode);
1648 if (U_FAILURE(errorCode)) {
1649 return;
1650 }
1651 if (fIsPoolBundle) {
1652 // Write strings that are sufficiently shared.
1653 // Avoid writing other strings.
1654 int32_t numStringsWritten = 0;
1655 int32_t numUnitsSaved = 0;
1656 int32_t numUnitsNotSaved = 0;
1657 for (int32_t i = 0; i < count; ++i) {
1658 StringResource *res = array[i];
1659 // Maximum pool string index when suffix-sharing the last character.
1660 int32_t maxStringIndex =
1661 f16BitUnits.length() + res->fNumCharsForLength + res->length() - 1;
1662 if (res->fNumUnitsSaved >= GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING &&
1663 maxStringIndex < RES_MAX_OFFSET) {
1664 res->writeUTF16v2(0, f16BitUnits);
1665 ++numStringsWritten;
1666 numUnitsSaved += res->fNumUnitsSaved;
1667 } else {
1668 numUnitsNotSaved += res->fNumUnitsSaved;
1669 res->fRes = URES_MAKE_EMPTY_RESOURCE(URES_STRING);
1670 res->fWritten = TRUE;
1671 }
1672 }
1673 if (f16BitUnits.isBogus()) {
1674 errorCode = U_MEMORY_ALLOCATION_ERROR;
1675 }
1676 if (getShowWarning()) { // not quiet
1677 printf("number of shared strings: %d\n", (int)numStringsWritten);
1678 printf("16-bit units for strings: %6d = %6d bytes\n",
1679 (int)f16BitUnits.length(), (int)f16BitUnits.length() * 2);
1680 printf("16-bit units saved: %6d = %6d bytes\n",
1681 (int)numUnitsSaved, (int)numUnitsSaved * 2);
1682 printf("16-bit units not saved: %6d = %6d bytes\n",
1683 (int)numUnitsNotSaved, (int)numUnitsNotSaved * 2);
1684 }
1685 } else {
1686 assert(fPoolStringIndexLimit <= fUsePoolBundle->fStringIndexLimit);
1687 /* Write the non-suffix strings. */
1688 int32_t i;
1689 for (i = 0; i < count && array[i]->fSame == NULL; ++i) {
1690 StringResource *res = array[i];
1691 if (!res->fWritten) {
1692 int32_t localStringIndex = f16BitUnits.length();
1693 if (localStringIndex >= fLocalStringIndexLimit) {
1694 fLocalStringIndexLimit = localStringIndex + 1;
1695 }
1696 res->writeUTF16v2(fPoolStringIndexLimit, f16BitUnits);
1697 }
1698 }
1699 if (f16BitUnits.isBogus()) {
1700 errorCode = U_MEMORY_ALLOCATION_ERROR;
1701 return;
1702 }
1703 if (fWritePoolBundle != NULL && gFormatVersion >= 3) {
1704 PseudoListResource *poolStrings =
1705 static_cast<PseudoListResource *>(fWritePoolBundle->fRoot);
1706 for (i = 0; i < count && array[i]->fSame == NULL; ++i) {
1707 assert(!array[i]->fString.isEmpty());
1708 StringResource *poolString =
1709 new StringResource(fWritePoolBundle, array[i]->fString, errorCode);
1710 if (poolString == NULL) {
1711 errorCode = U_MEMORY_ALLOCATION_ERROR;
1712 break;
1713 }
1714 poolStrings->add(poolString);
1715 }
1716 }
1717 /* Write the suffix strings. Make each point to the real string. */
1718 for (; i < count; ++i) {
1719 StringResource *res = array[i];
1720 if (res->fWritten) {
1721 continue;
1722 }
1723 StringResource *same = res->fSame;
1724 assert(res->length() != same->length()); // Set strings are unique.
1725 res->fRes = same->fRes + same->fNumCharsForLength + res->fSuffixOffset;
1726 int32_t localStringIndex = (int32_t)RES_GET_OFFSET(res->fRes) - fPoolStringIndexLimit;
1727 // Suffixes of pool strings have been set already.
1728 assert(localStringIndex >= 0);
1729 if (localStringIndex >= fLocalStringIndexLimit) {
1730 fLocalStringIndexLimit = localStringIndex + 1;
1731 }
1732 res->fWritten = TRUE;
1733 }
1734 }
1735 // +1 to account for the initial zero in f16BitUnits
1736 assert(f16BitUnits.length() <= (f16BitStringsLength + 1));
1737}
3d1f044b
A
1738
1739void SResource::applyFilter(
1740 const PathFilter& /*filter*/,
1741 ResKeyPath& /*path*/,
1742 const SRBRoot* /*bundle*/) {
1743 // Only a few resource types (tables) are capable of being filtered.
1744}
1745
1746void TableResource::applyFilter(
1747 const PathFilter& filter,
1748 ResKeyPath& path,
1749 const SRBRoot* bundle) {
1750 SResource* prev = nullptr;
1751 SResource* curr = fFirst;
1752 for (; curr != nullptr;) {
1753 path.push(curr->getKeyString(bundle));
1754 auto inclusion = filter.match(path);
1755 if (inclusion == PathFilter::EInclusion::INCLUDE) {
1756 // Include whole subtree
1757 // no-op
1758 if (isVerbose()) {
1759 std::cout << "genrb subtree: " << bundle->fLocale << ": INCLUDE: " << path << std::endl;
1760 }
1761 } else if (inclusion == PathFilter::EInclusion::EXCLUDE) {
1762 // Reject the whole subtree
1763 // Remove it from the linked list
1764 if (isVerbose()) {
1765 std::cout << "genrb subtree: " << bundle->fLocale << ": DELETE: " << path << std::endl;
1766 }
1767 if (prev == nullptr) {
1768 fFirst = curr->fNext;
1769 } else {
1770 prev->fNext = curr->fNext;
1771 }
1772 fCount--;
1773 delete curr;
1774 curr = prev;
1775 } else {
1776 U_ASSERT(inclusion == PathFilter::EInclusion::PARTIAL);
1777 // Recurse into the child
1778 curr->applyFilter(filter, path, bundle);
1779 }
1780 path.pop();
1781
1782 prev = curr;
1783 if (curr == nullptr) {
1784 curr = fFirst;
1785 } else {
1786 curr = curr->fNext;
1787 }
1788 }
1789}