2 **********************************************************************
3 * Copyright (C) 2002, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 10/11/02 aliu Creation.
8 **********************************************************************
13 #include "unicode/utypes.h"
14 #include "unicode/putil.h"
24 // TODO: Clean up and comment this code.
26 //----------------------------------------------------------------------
29 // This is the raw data to be output. We define the data structure,
30 // then include a machine-generated header that contains the actual
33 #include "unicode/uchar.h"
34 #include "unicode/uscript.h"
41 AliasName(const char* str
, int32_t index
);
43 int compare(const AliasName
& other
) const;
45 UBool
operator==(const AliasName
& other
) const {
46 return compare(other
) == 0;
49 UBool
operator!=(const AliasName
& other
) const {
50 return compare(other
) != 0;
54 AliasName::AliasName(const char* _str
,
61 int AliasName::compare(const AliasName
& other
) const {
62 return uprv_comparePropertyNames(str
, other
.str
);
68 int32_t nameGroupIndex
;
70 Alias(int32_t enumValue
,
71 int32_t nameGroupIndex
);
73 int32_t getUniqueNames(int32_t* nameGroupIndices
) const;
76 Alias::Alias(int32_t anEnumValue
,
77 int32_t aNameGroupIndex
) :
78 enumValue(anEnumValue
),
79 nameGroupIndex(aNameGroupIndex
)
83 class Property
: public Alias
{
86 const Alias
* valueList
;
88 Property(int32_t enumValue
,
89 int32_t nameGroupIndex
,
91 const Alias
* valueList
);
94 Property::Property(int32_t _enumValue
,
95 int32_t _nameGroupIndex
,
97 const Alias
* _valueList
) :
98 Alias(_enumValue
, _nameGroupIndex
),
99 valueCount(_valueCount
),
100 valueList(_valueList
)
104 // *** Include the data header ***
107 /* return a list of unique names, not including "", for this property
108 * @param stringIndices array of at least MAX_NAMES_PER_GROUP
109 * elements, will be filled with indices into STRING_TABLE
110 * @return number of indices, >= 1
112 int32_t Alias::getUniqueNames(int32_t* stringIndices
) const {
114 int32_t i
= nameGroupIndex
;
117 int32_t j
= NAME_GROUP
[i
++];
122 if (j
== 0) continue; // omit "" entries
124 for (int32_t k
=0; k
<count
; ++k
) {
125 if (stringIndices
[k
] == j
) {
129 // also do a string check for things like "age|Age"
130 if (STRING_TABLE
[stringIndices
[k
]] == STRING_TABLE
[j
]) {
131 //printf("Found dupe %s|%s\n",
132 // STRING_TABLE[stringIndices[k]].str,
133 // STRING_TABLE[j].str);
138 if (dupe
) continue; // omit duplicates
139 stringIndices
[count
++] = j
;
145 //----------------------------------------------------------------------
147 #define MALLOC(type, count) \
148 (type*) uprv_malloc(sizeof(type) * count)
150 void die(const char* msg
) {
151 fprintf(stderr
, "Error: %s\n", msg
);
155 //----------------------------------------------------------------------
158 * A list of Alias objects.
162 virtual const Alias
& operator[](int32_t i
) const = 0;
163 virtual int32_t count() const = 0;
169 class AliasArrayList
: public AliasList
{
173 AliasArrayList(const Alias
* _a
, int32_t _n
) {
177 virtual const Alias
& operator[](int32_t i
) const {
180 virtual int32_t count() const {
188 class PropertyArrayList
: public AliasList
{
192 PropertyArrayList(const Property
* _a
, int32_t _n
) {
196 virtual const Alias
& operator[](int32_t i
) const {
199 virtual int32_t count() const {
204 //----------------------------------------------------------------------
207 * An element in a name index. It maps a name (given by index) into
210 class NameToEnumEntry
{
214 NameToEnumEntry(int32_t a
, int32_t b
) { nameIndex
=a
; enumValue
=b
; }
217 // Sort function for NameToEnumEntry (sort by name index)
218 U_CAPI
int compareNameToEnumEntry(const void* e1
, const void* e2
) {
219 return ((NameToEnumEntry
*)e1
)->nameIndex
- ((NameToEnumEntry
*)e2
)->nameIndex
;
222 //----------------------------------------------------------------------
225 * An element in an enum index. It maps an enum into a name group entry
228 class EnumToNameGroupEntry
{
231 int32_t nameGroupIndex
;
232 EnumToNameGroupEntry(int32_t a
, int32_t b
) { enumValue
=a
; nameGroupIndex
=b
; }
234 // are enumValues contiguous for count entries starting with this one?
235 // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
236 UBool
isContiguous(int32_t count
) const {
237 const EnumToNameGroupEntry
* p
= this;
238 for (int32_t i
=1; i
<count
; ++i
) {
239 if (p
[i
].enumValue
!= (this->enumValue
+ i
)) {
247 // Sort function for EnumToNameGroupEntry (sort by name index)
248 U_CAPI
int compareEnumToNameGroupEntry(const void* e1
, const void* e2
) {
249 return ((EnumToNameGroupEntry
*)e1
)->enumValue
- ((EnumToNameGroupEntry
*)e2
)->enumValue
;
252 //----------------------------------------------------------------------
255 * An element in the map from enumerated property enums to value maps.
257 class EnumToValueEntry
{
260 EnumToNameGroupEntry
* enumToName
;
261 int32_t enumToName_count
;
262 NameToEnumEntry
* nameToEnum
;
263 int32_t nameToEnum_count
;
265 // are enumValues contiguous for count entries starting with this one?
266 // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
267 UBool
isContiguous(int32_t count
) const {
268 const EnumToValueEntry
* p
= this;
269 for (int32_t i
=1; i
<count
; ++i
) {
270 if (p
[i
].enumValue
!= (this->enumValue
+ i
)) {
278 // Sort function for EnumToValueEntry (sort by enum)
279 U_CAPI
int compareEnumToValueEntry(const void* e1
, const void* e2
) {
280 return ((EnumToValueEntry
*)e1
)->enumValue
- ((EnumToValueEntry
*)e2
)->enumValue
;
283 //----------------------------------------------------------------------
286 #define IS_VALID_OFFSET(x) (((x)>=0)&&((x)<=MAX_OFFSET))
290 PropertyAliases header
;
293 NonContiguousEnumToOffset
* enumToName
;
294 int32_t enumToName_size
;
295 Offset enumToName_offset
;
300 NameToEnum
* nameToEnum
;
301 int32_t nameToEnum_size
;
302 Offset nameToEnum_offset
;
305 NonContiguousEnumToOffset
* enumToValue
;
306 int32_t enumToValue_size
;
307 Offset enumToValue_offset
;
311 int32_t valueMap_size
;
312 int32_t valueMap_count
;
313 Offset valueMap_offset
;
315 // for any i, one of valueEnumToName[i] or valueNCEnumToName[i] is
316 // NULL and one is not. valueEnumToName_size[i] is the size of
317 // the non-NULL one. i=0..valueMapCount-1
319 EnumToOffset
** valueEnumToName
;
321 NonContiguousEnumToOffset
** valueNCEnumToName
;
322 int32_t* valueEnumToName_size
;
323 Offset
* valueEnumToName_offset
;
325 // arrays of valueMap_count pointers, sizes, & offsets
326 NameToEnum
** valueNameToEnum
;
327 int32_t* valueNameToEnum_size
;
328 Offset
* valueNameToEnum_offset
;
331 Offset
* nameGroupPool
;
332 int32_t nameGroupPool_count
;
333 int32_t nameGroupPool_size
;
334 Offset nameGroupPool_offset
;
338 int32_t stringPool_count
;
339 int32_t stringPool_size
;
340 Offset stringPool_offset
;
341 Offset
* stringPool_offsetArray
; // relative to stringPool
343 int32_t total_size
; // size of everything
349 Builder(int32_t debugLevel
);
352 void buildTopLevelProperties(const NameToEnumEntry
* propName
,
353 int32_t propNameCount
,
354 const EnumToNameGroupEntry
* propEnum
,
355 int32_t propEnumCount
);
357 void buildValues(const EnumToValueEntry
* e2v
,
360 void buildStringPool(const AliasName
* propertyNames
,
361 int32_t propertyNameCount
,
362 const int32_t* nameGroupIndices
,
363 int32_t nameGroupIndicesCount
);
367 int8_t* createData(int32_t& length
) const;
371 static EnumToOffset
* buildEnumToOffset(const EnumToNameGroupEntry
* e2ng
,
374 static NonContiguousEnumToOffset
*
375 buildNCEnumToNameGroup(const EnumToNameGroupEntry
* e2ng
,
379 static NonContiguousEnumToOffset
*
380 buildNCEnumToValue(const EnumToValueEntry
* e2v
,
384 static NameToEnum
* buildNameToEnum(const NameToEnumEntry
* nameToEnum
,
388 Offset
stringIndexToOffset(int32_t index
, UBool allowNeg
=FALSE
) const;
389 void fixupNameToEnum(NameToEnum
* n
);
390 void fixupEnumToNameGroup(EnumToOffset
* e2ng
);
391 void fixupNCEnumToNameGroup(NonContiguousEnumToOffset
* e2ng
);
393 void computeOffsets();
394 void fixupStringPoolOffsets();
395 void fixupNameGroupPoolOffsets();
396 void fixupMiscellaneousOffsets();
398 static int32_t align(int32_t a
);
399 static void erase(void* p
, int32_t size
);
402 Builder::Builder(int32_t debugLevel
) {
410 valueNCEnumToName
= 0;
411 valueEnumToName_size
= 0;
412 valueEnumToName_offset
= 0;
414 valueNameToEnum_size
= 0;
415 valueNameToEnum_offset
= 0;
418 stringPool_offsetArray
= 0;
421 Builder::~Builder() {
422 uprv_free(enumToName
);
423 uprv_free(nameToEnum
);
424 uprv_free(enumToValue
);
426 for (int32_t i
=0; i
<valueMap_count
; ++i
) {
427 uprv_free(valueEnumToName
[i
]);
428 uprv_free(valueNCEnumToName
[i
]);
429 uprv_free(valueNameToEnum
[i
]);
431 uprv_free(valueEnumToName
);
432 uprv_free(valueNCEnumToName
);
433 uprv_free(valueEnumToName_size
);
434 uprv_free(valueEnumToName_offset
);
435 uprv_free(valueNameToEnum
);
436 uprv_free(valueNameToEnum_size
);
437 uprv_free(valueNameToEnum_offset
);
438 uprv_free(nameGroupPool
);
439 uprv_free(stringPool
);
440 uprv_free(stringPool_offsetArray
);
443 int32_t Builder::align(int32_t a
) {
445 int32_t k
= a
% sizeof(int32_t);
449 a
+= sizeof(int32_t) - k
;
453 void Builder::erase(void* p
, int32_t size
) {
455 int8_t* q
= (int8_t*) p
;
461 EnumToOffset
* Builder::buildEnumToOffset(const EnumToNameGroupEntry
* e2ng
,
464 U_ASSERT(e2ng
->isContiguous(count
));
465 size
= align(EnumToOffset::getSize(count
));
466 EnumToOffset
* result
= (EnumToOffset
*) uprv_malloc(size
);
468 result
->enumStart
= e2ng
->enumValue
;
469 result
->enumLimit
= e2ng
->enumValue
+ count
;
470 Offset
* p
= result
->getOffsetArray();
471 for (int32_t i
=0; i
<count
; ++i
) {
472 // set these to NGI index values
473 // fix them up to NGI offset values
474 U_ASSERT(IS_VALID_OFFSET(e2ng
[i
].nameGroupIndex
));
475 p
[i
] = (Offset
) e2ng
[i
].nameGroupIndex
; // FIXUP later
480 NonContiguousEnumToOffset
*
481 Builder::buildNCEnumToNameGroup(const EnumToNameGroupEntry
* e2ng
,
484 U_ASSERT(!e2ng
->isContiguous(count
));
485 size
= align(NonContiguousEnumToOffset::getSize(count
));
486 NonContiguousEnumToOffset
* nc
= (NonContiguousEnumToOffset
*) uprv_malloc(size
);
489 EnumValue
* e
= nc
->getEnumArray();
490 Offset
* p
= nc
->getOffsetArray();
491 for (int32_t i
=0; i
<count
; ++i
) {
492 // set these to NGI index values
493 // fix them up to NGI offset values
494 e
[i
] = e2ng
[i
].enumValue
;
495 U_ASSERT(IS_VALID_OFFSET(e2ng
[i
].nameGroupIndex
));
496 p
[i
] = (Offset
) e2ng
[i
].nameGroupIndex
; // FIXUP later
501 NonContiguousEnumToOffset
*
502 Builder::buildNCEnumToValue(const EnumToValueEntry
* e2v
,
505 U_ASSERT(!e2v
->isContiguous(count
));
506 size
= align(NonContiguousEnumToOffset::getSize(count
));
507 NonContiguousEnumToOffset
* result
= (NonContiguousEnumToOffset
*) uprv_malloc(size
);
509 result
->count
= count
;
510 EnumValue
* e
= result
->getEnumArray();
511 for (int32_t i
=0; i
<count
; ++i
) {
512 e
[i
] = e2v
[i
].enumValue
;
513 // offset must be set later
519 * Given an index into the string pool, return an offset. computeOffsets()
520 * must have been called already. If allowNegative is true, allow negatives
521 * and preserve their sign.
523 Offset
Builder::stringIndexToOffset(int32_t index
, UBool allowNegative
) const {
524 // Index 0 is ""; we turn this into an Offset of zero
525 if (index
== 0) return 0;
528 return -Builder::stringIndexToOffset(-index
);
530 die("Negative string pool index");
533 if (index
>= stringPool_count
) {
534 die("String pool index too large");
536 Offset result
= stringPool_offset
+ stringPool_offsetArray
[index
];
537 U_ASSERT(result
>= 0 && result
< total_size
);
540 return 0; // never executed; make compiler happy
543 NameToEnum
* Builder::buildNameToEnum(const NameToEnumEntry
* nameToEnum
,
546 size
= align(NameToEnum::getSize(count
));
547 NameToEnum
* n2e
= (NameToEnum
*) uprv_malloc(size
);
550 Offset
* p
= n2e
->getNameArray();
551 EnumValue
* e
= n2e
->getEnumArray();
552 for (int32_t i
=0; i
<count
; ++i
) {
553 // set these to SP index values
554 // fix them up to SP offset values
555 U_ASSERT(IS_VALID_OFFSET(nameToEnum
[i
].nameIndex
));
556 p
[i
] = (Offset
) nameToEnum
[i
].nameIndex
; // FIXUP later
557 e
[i
] = nameToEnum
[i
].enumValue
;
563 void Builder::buildTopLevelProperties(const NameToEnumEntry
* propName
,
564 int32_t propNameCount
,
565 const EnumToNameGroupEntry
* propEnum
,
566 int32_t propEnumCount
) {
567 enumToName
= buildNCEnumToNameGroup(propEnum
,
570 nameToEnum
= buildNameToEnum(propName
,
575 void Builder::buildValues(const EnumToValueEntry
* e2v
,
579 U_ASSERT(!e2v
->isContiguous(count
));
581 valueMap_count
= count
;
583 enumToValue
= buildNCEnumToValue(e2v
, count
,
586 valueMap_size
= align(count
* sizeof(ValueMap
));
587 valueMap
= (ValueMap
*) uprv_malloc(valueMap_size
);
588 erase(valueMap
, valueMap_size
);
590 valueEnumToName
= MALLOC(EnumToOffset
*, count
);
591 valueNCEnumToName
= MALLOC(NonContiguousEnumToOffset
*, count
);
592 valueEnumToName_size
= MALLOC(int32_t, count
);
593 valueEnumToName_offset
= MALLOC(Offset
, count
);
594 valueNameToEnum
= MALLOC(NameToEnum
*, count
);
595 valueNameToEnum_size
= MALLOC(int32_t, count
);
596 valueNameToEnum_offset
= MALLOC(Offset
, count
);
598 for (i
=0; i
<count
; ++i
) {
600 e2v
[i
].enumToName
->isContiguous(e2v
[i
].enumToName_count
);
601 valueEnumToName
[i
] = 0;
602 valueNCEnumToName
[i
] = 0;
604 valueEnumToName
[i
] = buildEnumToOffset(e2v
[i
].enumToName
,
605 e2v
[i
].enumToName_count
,
606 valueEnumToName_size
[i
]);
608 valueNCEnumToName
[i
] = buildNCEnumToNameGroup(e2v
[i
].enumToName
,
609 e2v
[i
].enumToName_count
,
610 valueEnumToName_size
[i
]);
613 buildNameToEnum(e2v
[i
].nameToEnum
,
614 e2v
[i
].nameToEnum_count
,
615 valueNameToEnum_size
[i
]);
619 void Builder::buildStringPool(const AliasName
* propertyNames
,
620 int32_t propertyNameCount
,
621 const int32_t* nameGroupIndices
,
622 int32_t nameGroupIndicesCount
) {
625 nameGroupPool_count
= nameGroupIndicesCount
;
626 nameGroupPool_size
= sizeof(Offset
) * nameGroupPool_count
;
627 nameGroupPool
= MALLOC(Offset
, nameGroupPool_count
);
629 for (i
=0; i
<nameGroupPool_count
; ++i
) {
630 // Some indices are negative.
631 int32_t a
= nameGroupIndices
[i
];
633 U_ASSERT(IS_VALID_OFFSET(a
));
634 nameGroupPool
[i
] = (Offset
) nameGroupIndices
[i
];
637 stringPool_count
= propertyNameCount
;
639 // first string must be "" -- we skip it
640 U_ASSERT(*propertyNames
[0].str
== 0);
641 for (i
=1 /*sic*/; i
<propertyNameCount
; ++i
) {
642 stringPool_size
+= uprv_strlen(propertyNames
[i
].str
) + 1;
644 stringPool
= MALLOC(char, stringPool_size
);
645 stringPool_offsetArray
= MALLOC(Offset
, stringPool_count
);
647 char* p
= stringPool
;
648 stringPool_offsetArray
[0] = -1; // we don't use this entry
649 for (i
=1 /*sic*/; i
<propertyNameCount
; ++i
) {
650 const char* str
= propertyNames
[i
].str
;
651 int32_t len
= uprv_strlen(str
);
655 stringPool_offsetArray
[i
] = soFar
;
656 soFar
+= (Offset
)(len
+1);
658 U_ASSERT(soFar
== stringPool_size
);
659 U_ASSERT(p
== (stringPool
+ stringPool_size
));
662 // Confirm that PropertyAliases is a POD (plain old data; see C++
663 // std). The following union will _fail to compile_ if
664 // PropertyAliases is _not_ a POD. (Note: We used to use the offsetof
665 // macro to check this, but that's not quite right, so that test is
666 // commented out -- see below.)
670 } PropertyAliasesPODTest
;
672 void Builder::computeOffsets() {
674 Offset off
= sizeof(header
);
677 printf("header \t offset=%4d size=%5d\n", 0, off
);
680 // PropertyAliases must have no v-table and must be
681 // padded (if necessary) to the next 32-bit boundary.
682 //U_ASSERT(offsetof(PropertyAliases, enumToName_offset) == 0); // see above
683 U_ASSERT(sizeof(header
) % sizeof(int32_t) == 0);
685 #define COMPUTE_OFFSET(foo) COMPUTE_OFFSET2(foo,int32_t)
687 #define COMPUTE_OFFSET2(foo,type) \
688 if (debug>0) printf(#foo "\t offset=%4d size=%5d\n", off, foo##_size); \
689 foo##_offset = off; \
690 U_ASSERT(IS_VALID_OFFSET(off + foo##_size)); \
691 U_ASSERT(foo##_offset % sizeof(type) == 0); \
692 off = (Offset) (off + foo##_size);
694 COMPUTE_OFFSET(enumToName
); // 0:
695 COMPUTE_OFFSET(nameToEnum
); // 2:
696 COMPUTE_OFFSET(enumToValue
); // 3:
697 COMPUTE_OFFSET(valueMap
); // 4:
699 for (i
=0; i
<valueMap_count
; ++i
) {
701 printf(" enumToName[%d]\t offset=%4d size=%5d\n",
702 i
, off
, valueEnumToName_size
[i
]);
705 valueEnumToName_offset
[i
] = off
; // 5:
706 U_ASSERT(IS_VALID_OFFSET(off
+ valueEnumToName_size
[i
]));
707 off
= (Offset
) (off
+ valueEnumToName_size
[i
]);
710 printf(" nameToEnum[%d]\t offset=%4d size=%5d\n",
711 i
, off
, valueNameToEnum_size
[i
]);
714 valueNameToEnum_offset
[i
] = off
; // 6:
715 U_ASSERT(IS_VALID_OFFSET(off
+ valueNameToEnum_size
[i
]));
716 off
= (Offset
) (off
+ valueNameToEnum_size
[i
]);
719 // These last two chunks have weaker alignment needs
720 COMPUTE_OFFSET2(nameGroupPool
,Offset
); // 98:
721 COMPUTE_OFFSET2(stringPool
,char); // 99:
724 if (debug
>0) printf("total size=%5d\n\n", total_size
);
725 U_ASSERT(total_size
<= (MAX_OFFSET
+1));
728 void Builder::fixupNameToEnum(NameToEnum
* n
) {
729 // Fix the string pool offsets in n
730 Offset
* p
= n
->getNameArray();
731 for (int32_t i
=0; i
<n
->count
; ++i
) {
732 p
[i
] = stringIndexToOffset(p
[i
]);
736 void Builder::fixupStringPoolOffsets() {
740 fixupNameToEnum(nameToEnum
);
743 for (i
=0; i
<valueMap_count
; ++i
) {
744 fixupNameToEnum(valueNameToEnum
[i
]);
748 for (i
=0; i
<nameGroupPool_count
; ++i
) {
749 nameGroupPool
[i
] = stringIndexToOffset(nameGroupPool
[i
], TRUE
);
753 void Builder::fixupEnumToNameGroup(EnumToOffset
* e2ng
) {
756 Offset
* p
= e2ng
->getOffsetArray();
757 for (i
=e2ng
->enumStart
, j
=0; i
<e2ng
->enumLimit
; ++i
, ++j
) {
758 p
[j
] = nameGroupPool_offset
+ sizeof(Offset
) * p
[j
];
762 void Builder::fixupNCEnumToNameGroup(NonContiguousEnumToOffset
* e2ng
) {
764 EnumValue
* e
= e2ng
->getEnumArray();
765 Offset
* p
= e2ng
->getOffsetArray();
766 for (i
=0; i
<e2ng
->count
; ++i
) {
767 p
[i
] = nameGroupPool_offset
+ sizeof(Offset
) * p
[i
];
771 void Builder::fixupNameGroupPoolOffsets() {
775 fixupNCEnumToNameGroup(enumToName
);
780 for (i
=0; i
<valueMap_count
; ++i
) {
782 if (valueEnumToName
[i
] != 0) {
783 fixupEnumToNameGroup(valueEnumToName
[i
]);
786 if (valueNCEnumToName
[i
] != 0) {
787 fixupNCEnumToNameGroup(valueNCEnumToName
[i
]);
792 void Builder::fixupMiscellaneousOffsets() {
796 erase(&header
, sizeof(header
));
797 header
.enumToName_offset
= enumToName_offset
;
798 header
.nameToEnum_offset
= nameToEnum_offset
;
799 header
.enumToValue_offset
= enumToValue_offset
;
800 // header meta-info used by Java:
801 U_ASSERT(total_size
> 0 && total_size
< 0x7FFF);
802 header
.total_size
= (int16_t) total_size
;
803 header
.valueMap_offset
= valueMap_offset
;
804 header
.valueMap_count
= (int16_t) valueMap_count
;
805 header
.nameGroupPool_offset
= nameGroupPool_offset
;
806 header
.nameGroupPool_count
= (int16_t) nameGroupPool_count
;
807 header
.stringPool_offset
= stringPool_offset
;
808 header
.stringPool_count
= (int16_t) stringPool_count
- 1; // don't include "" entry
810 U_ASSERT(valueMap_count
<= 0x7FFF);
811 U_ASSERT(nameGroupPool_count
<= 0x7FFF);
812 U_ASSERT(stringPool_count
<= 0x7FFF);
815 Offset
* p
= enumToValue
->getOffsetArray();
816 EnumValue
* e
= enumToValue
->getEnumArray();
817 U_ASSERT(valueMap_count
== enumToValue
->count
);
818 for (i
=0; i
<valueMap_count
; ++i
) {
819 p
[i
] = (Offset
)(valueMap_offset
+ sizeof(ValueMap
) * i
);
823 for (i
=0; i
<valueMap_count
; ++i
) {
824 ValueMap
& v
= valueMap
[i
];
825 v
.enumToName_offset
= v
.ncEnumToName_offset
= 0;
826 if (valueEnumToName
[i
] != 0) {
827 v
.enumToName_offset
= valueEnumToName_offset
[i
];
829 if (valueNCEnumToName
[i
] != 0) {
830 v
.ncEnumToName_offset
= valueEnumToName_offset
[i
];
832 v
.nameToEnum_offset
= valueNameToEnum_offset
[i
];
836 void Builder::fixup() {
838 fixupStringPoolOffsets();
839 fixupNameGroupPoolOffsets();
840 fixupMiscellaneousOffsets();
843 int8_t* Builder::createData(int32_t& length
) const {
845 int8_t* result
= MALLOC(int8_t, length
);
848 int8_t* limit
= result
+ length
;
850 #define APPEND2(x, size) \
851 U_ASSERT((p+size)<=limit); \
852 uprv_memcpy(p, x, size); \
855 #define APPEND(x) APPEND2(x, x##_size)
857 APPEND2(&header
, sizeof(header
));
863 for (int32_t i
=0; i
<valueMap_count
; ++i
) {
864 U_ASSERT((valueEnumToName
[i
] != 0 && valueNCEnumToName
[i
] == 0) ||
865 (valueEnumToName
[i
] == 0 && valueNCEnumToName
[i
] != 0));
866 if (valueEnumToName
[i
] != 0) {
867 APPEND2(valueEnumToName
[i
], valueEnumToName_size
[i
]);
869 if (valueNCEnumToName
[i
] != 0) {
870 APPEND2(valueNCEnumToName
[i
], valueEnumToName_size
[i
]);
872 APPEND2(valueNameToEnum
[i
], valueNameToEnum_size
[i
]);
875 APPEND(nameGroupPool
);
878 U_ASSERT(p
== limit
);
883 //----------------------------------------------------------------------
885 /* UDataInfo cf. udata.h */
886 static UDataInfo dataInfo
= {
895 {PNAME_SIG_0
, PNAME_SIG_1
, PNAME_SIG_2
, PNAME_SIG_3
},
896 {PNAME_FORMAT_VERSION
, 0, 0, 0}, /* formatVersion */
897 {VERSION_0
, VERSION_1
, VERSION_2
, VERSION_3
} /* Unicode version */
901 U_CAPI
int compareAliasNames(const void* elem1
, const void* elem2
) {
902 return ((const AliasName
*)elem1
)->compare(*(const AliasName
*)elem2
);
907 // command-line options
913 int MMain(int argc
, char *argv
[]);
916 NameToEnumEntry
* createNameIndex(const AliasList
& list
,
917 int32_t& nameIndexCount
);
919 EnumToNameGroupEntry
* createEnumIndex(const AliasList
& list
);
921 int32_t writeDataFile(const char *destdir
, const Builder
&);
924 int main(int argc
, char *argv
[]) {
926 U_MAIN_INIT_ARGS(argc
, argv
);
927 return app
.MMain(argc
, argv
);
930 static UOption options
[]={
932 UOPTION_HELP_QUESTION_MARK
,
936 UOPTION_DEF("debug", 'D', UOPT_REQUIRES_ARG
),
939 NameToEnumEntry
* genpname::createNameIndex(const AliasList
& list
,
940 int32_t& nameIndexCount
) {
942 // Build name => enum map
944 // This is an n->1 map. There are typically multiple names
945 // mapping to one enum. The name index is sorted in order of the name,
946 // as defined by the uprv_compareAliasNames() function.
949 int32_t count
= list
.count();
951 // compute upper limit on number of names in the index
952 int32_t nameIndexCapacity
= count
* MAX_NAMES_PER_GROUP
;
953 NameToEnumEntry
* nameIndex
= MALLOC(NameToEnumEntry
, nameIndexCapacity
);
956 int32_t names
[MAX_NAMES_PER_GROUP
];
957 for (i
=0; i
<count
; ++i
) {
958 const Alias
& p
= list
[i
];
959 int32_t n
= p
.getUniqueNames(names
);
960 for (j
=0; j
<n
; ++j
) {
961 U_ASSERT(nameIndexCount
< nameIndexCapacity
);
962 nameIndex
[nameIndexCount
++] =
963 NameToEnumEntry(names
[j
], p
.enumValue
);
966 qsort((void*) nameIndex
, nameIndexCount
, sizeof(nameIndex
[0]),
967 compareNameToEnumEntry
);
969 printf("Alias names: %d\n", nameIndexCount
);
970 for (i
=0; i
<nameIndexCount
; ++i
) {
972 STRING_TABLE
[nameIndex
[i
].nameIndex
].str
,
973 nameIndex
[i
].enumValue
);
977 // make sure there are no duplicates. for a sorted list we need
978 // only compare adjacent items. Alias.getUniqueNames() has
979 // already eliminated duplicate names for a single property, which
980 // does occur, so we're checking for duplicate names between two
981 // properties, which should never occur.
983 for (i
=1; i
<nameIndexCount
; ++i
) {
984 if (STRING_TABLE
[nameIndex
[i
-1].nameIndex
] ==
985 STRING_TABLE
[nameIndex
[i
].nameIndex
]) {
986 printf("Error: Duplicate names in property list: \"%s\", \"%s\"\n",
987 STRING_TABLE
[nameIndex
[i
-1].nameIndex
].str
,
988 STRING_TABLE
[nameIndex
[i
].nameIndex
].str
);
993 die("Two or more duplicate names in property list");
999 EnumToNameGroupEntry
* genpname::createEnumIndex(const AliasList
& list
) {
1001 // Build the enum => name map
1003 // This is a 1->n map. Each enum maps to 1 or more names. To
1004 // accomplish this the index entry points to an element of the
1005 // NAME_GROUP array. This is the short name (which may be empty).
1006 // From there, subsequent elements of NAME_GROUP are alternate
1007 // names for this enum, up to and including the first one that is
1008 // negative (negate for actual index).
1011 int32_t count
= list
.count();
1013 EnumToNameGroupEntry
* enumIndex
= MALLOC(EnumToNameGroupEntry
, count
);
1014 for (i
=0; i
<count
; ++i
) {
1015 const Alias
& p
= list
[i
];
1016 enumIndex
[i
] = EnumToNameGroupEntry(p
.enumValue
, p
.nameGroupIndex
);
1018 qsort((void*) enumIndex
, count
, sizeof(enumIndex
[0]),
1019 compareEnumToNameGroupEntry
);
1021 printf("Property enums: %d\n", count
);
1022 for (i
=0; i
<count
; ++i
) {
1023 printf("%d => %d: ",
1024 enumIndex
[i
].enumValue
,
1025 enumIndex
[i
].nameGroupIndex
);
1027 for (j
=enumIndex
[i
].nameGroupIndex
; !done
; ++j
) {
1033 printf("\"%s\"", STRING_TABLE
[k
].str
);
1034 if (!done
) printf(", ");
1043 int genpname::MMain(int argc
, char* argv
[]) {
1047 /* preset then read command line options */
1048 options
[3].value
=u_getDataDirectory();
1049 argc
=u_parseArgs(argc
, argv
, sizeof(options
)/sizeof(options
[0]), options
);
1051 /* error handling, printing usage message */
1054 "error in command line argument \"%s\"\n",
1058 debug
= options
[5].doesOccur
? (*options
[5].value
- '0') : 0;
1060 if (argc
!=1 || options
[0].doesOccur
|| options
[1].doesOccur
||
1061 debug
< 0 || debug
> 9) {
1063 "usage: %s [-options]\n"
1064 "\tcreate " U_ICUDATA_NAME
"_" PNAME_DATA_NAME
"." PNAME_DATA_TYPE
"\n"
1066 "\t-h or -? or --help this usage text\n"
1067 "\t-v or --verbose turn on verbose output\n"
1068 "\t-c or --copyright include a copyright notice\n"
1069 "\t-d or --destdir destination directory, followed by the path\n"
1070 "\t-D or --debug 0..9 emit debugging messages (if > 0)\n",
1072 return argc
<0 ? U_ILLEGAL_ARGUMENT_ERROR
: U_ZERO_ERROR
;
1075 /* get the options values */
1076 useCopyright
=options
[2].doesOccur
;
1077 verbose
= options
[4].doesOccur
;
1079 // ------------------------------------------------------------
1080 // Sort the string table. This produces the proper sorting
1081 // using the actual comparison function we will use.
1082 qsort((void*) STRING_TABLE
, STRING_COUNT
, sizeof(STRING_TABLE
[0]),
1085 printf("String pool: %d\n", STRING_COUNT
);
1087 for (i
=0; i
<STRING_COUNT
; ++i
) {
1088 REMAP
[STRING_TABLE
[i
].index
] = i
;
1090 if (i
!= 0) printf(", ");
1091 printf("%s (%d)", STRING_TABLE
[i
].str
, STRING_TABLE
[i
].index
);
1098 // ------------------------------------------------------------
1099 // Fixup the NAME_GROUP indices so they match the sorted order
1100 for (i
=0; i
<NAME_GROUP_COUNT
; ++i
) {
1101 // keep negative entries (end markers) negative
1102 if (NAME_GROUP
[i
] < 0) {
1103 NAME_GROUP
[i
] = -REMAP
[-NAME_GROUP
[i
]];
1105 NAME_GROUP
[i
] = REMAP
[NAME_GROUP
[i
]];
1109 // ------------------------------------------------------------
1110 // Create top-level property indices
1112 PropertyArrayList
props(PROPERTY
, PROPERTY_COUNT
);
1113 int32_t propNameCount
;
1114 NameToEnumEntry
* propName
= createNameIndex(props
, propNameCount
);
1115 EnumToNameGroupEntry
* propEnum
= createEnumIndex(props
);
1117 // ------------------------------------------------------------
1118 // Create indices for the value list for each enumerated property
1120 // This will have more entries than we need...
1121 EnumToValueEntry
* enumToValue
= MALLOC(EnumToValueEntry
, PROPERTY_COUNT
);
1122 int32_t enumToValue_count
= 0;
1123 for (i
=0, j
=0; i
<PROPERTY_COUNT
; ++i
) {
1124 if (PROPERTY
[i
].valueCount
== 0) continue;
1125 AliasArrayList
values(PROPERTY
[i
].valueList
,
1126 PROPERTY
[i
].valueCount
);
1127 enumToValue
[j
].enumValue
= PROPERTY
[i
].enumValue
;
1128 enumToValue
[j
].enumToName
= createEnumIndex(values
);
1129 enumToValue
[j
].enumToName_count
= PROPERTY
[i
].valueCount
;
1130 enumToValue
[j
].nameToEnum
= createNameIndex(values
,
1131 enumToValue
[j
].nameToEnum_count
);
1134 enumToValue_count
= j
;
1135 qsort((void*) enumToValue
, enumToValue_count
, sizeof(enumToValue
[0]),
1136 compareEnumToValueEntry
);
1138 // ------------------------------------------------------------
1139 // Build PropertyAliases layout in memory
1141 Builder
builder(debug
);
1143 builder
.buildTopLevelProperties(propName
,
1148 builder
.buildValues(enumToValue
,
1151 builder
.buildStringPool(STRING_TABLE
,
1158 ////////////////////////////////////////////////////////////
1159 // Write the output file
1160 ////////////////////////////////////////////////////////////
1161 int32_t wlen
= writeDataFile(options
[3].value
, builder
);
1163 fprintf(stdout
, "Output file: %s.%s, %ld bytes\n",
1164 U_ICUDATA_NAME
"_" PNAME_DATA_NAME
, PNAME_DATA_TYPE
, (long)wlen
);
1167 return 0; // success
1170 int32_t genpname::writeDataFile(const char *destdir
, const Builder
& builder
) {
1172 int8_t* data
= builder
.createData(length
);
1174 UNewDataMemory
*pdata
;
1175 UErrorCode status
= U_ZERO_ERROR
;
1177 pdata
= udata_create(destdir
, PNAME_DATA_TYPE
, U_ICUDATA_NAME
"_" PNAME_DATA_NAME
, &dataInfo
,
1178 useCopyright
? U_COPYRIGHT_STRING
: 0, &status
);
1179 if (U_FAILURE(status
)) {
1180 die("Unable to create data memory");
1183 udata_writeBlock(pdata
, data
, length
);
1185 int32_t dataLength
= (int32_t) udata_finish(pdata
, &status
);
1186 if (U_FAILURE(status
)) {
1187 die("Error writing output file");
1189 if (dataLength
!= length
) {
1190 die("Written file doesn't match expected size");