2 **********************************************************************
3 * Copyright (C) 2002-2006, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 10/11/02 aliu Creation.
8 **********************************************************************
11 #include "unicode/utypes.h"
12 #include "unicode/putil.h"
13 #include "unicode/uclean.h"
26 // TODO: Clean up and comment this code.
28 //----------------------------------------------------------------------
31 // This is the raw data to be output. We define the data structure,
32 // then include a machine-generated header that contains the actual
35 #include "unicode/uchar.h"
36 #include "unicode/uscript.h"
37 #include "unicode/unorm.h"
44 AliasName(const char* str
, int32_t index
);
46 int compare(const AliasName
& other
) const;
48 UBool
operator==(const AliasName
& other
) const {
49 return compare(other
) == 0;
52 UBool
operator!=(const AliasName
& other
) const {
53 return compare(other
) != 0;
57 AliasName::AliasName(const char* _str
,
64 int AliasName::compare(const AliasName
& other
) const {
65 return uprv_comparePropertyNames(str
, other
.str
);
71 int32_t nameGroupIndex
;
73 Alias(int32_t enumValue
,
74 int32_t nameGroupIndex
);
76 int32_t getUniqueNames(int32_t* nameGroupIndices
) const;
79 Alias::Alias(int32_t anEnumValue
,
80 int32_t aNameGroupIndex
) :
81 enumValue(anEnumValue
),
82 nameGroupIndex(aNameGroupIndex
)
86 class Property
: public Alias
{
89 const Alias
* valueList
;
91 Property(int32_t enumValue
,
92 int32_t nameGroupIndex
,
94 const Alias
* valueList
);
97 Property::Property(int32_t _enumValue
,
98 int32_t _nameGroupIndex
,
100 const Alias
* _valueList
) :
101 Alias(_enumValue
, _nameGroupIndex
),
102 valueCount(_valueCount
),
103 valueList(_valueList
)
107 // *** Include the data header ***
110 /* return a list of unique names, not including "", for this property
111 * @param stringIndices array of at least MAX_NAMES_PER_GROUP
112 * elements, will be filled with indices into STRING_TABLE
113 * @return number of indices, >= 1
115 int32_t Alias::getUniqueNames(int32_t* stringIndices
) const {
117 int32_t i
= nameGroupIndex
;
120 int32_t j
= NAME_GROUP
[i
++];
125 if (j
== 0) continue; // omit "" entries
127 for (int32_t k
=0; k
<count
; ++k
) {
128 if (stringIndices
[k
] == j
) {
132 // also do a string check for things like "age|Age"
133 if (STRING_TABLE
[stringIndices
[k
]] == STRING_TABLE
[j
]) {
134 //printf("Found dupe %s|%s\n",
135 // STRING_TABLE[stringIndices[k]].str,
136 // STRING_TABLE[j].str);
141 if (dupe
) continue; // omit duplicates
142 stringIndices
[count
++] = j
;
148 //----------------------------------------------------------------------
150 #define MALLOC(type, count) \
151 (type*) uprv_malloc(sizeof(type) * count)
153 void die(const char* msg
) {
154 fprintf(stderr
, "Error: %s\n", msg
);
158 //----------------------------------------------------------------------
161 * A list of Alias objects.
165 virtual ~AliasList();
166 virtual const Alias
& operator[](int32_t i
) const = 0;
167 virtual int32_t count() const = 0;
170 AliasList::~AliasList() {}
175 class AliasArrayList
: public AliasList
{
179 AliasArrayList(const Alias
* _a
, int32_t _n
) {
183 virtual const Alias
& operator[](int32_t i
) const {
186 virtual int32_t count() const {
194 class PropertyArrayList
: public AliasList
{
198 PropertyArrayList(const Property
* _a
, int32_t _n
) {
202 virtual const Alias
& operator[](int32_t i
) const {
205 virtual int32_t count() const {
210 //----------------------------------------------------------------------
213 * An element in a name index. It maps a name (given by index) into
216 class NameToEnumEntry
{
220 NameToEnumEntry(int32_t a
, int32_t b
) { nameIndex
=a
; enumValue
=b
; }
223 // Sort function for NameToEnumEntry (sort by name)
225 compareNameToEnumEntry(const void * /*context*/, const void* e1
, const void* e2
) {
227 STRING_TABLE
[((NameToEnumEntry
*)e1
)->nameIndex
].
228 compare(STRING_TABLE
[((NameToEnumEntry
*)e2
)->nameIndex
]);
231 //----------------------------------------------------------------------
234 * An element in an enum index. It maps an enum into a name group entry
237 class EnumToNameGroupEntry
{
240 int32_t nameGroupIndex
;
241 EnumToNameGroupEntry(int32_t a
, int32_t b
) { enumValue
=a
; nameGroupIndex
=b
; }
243 // are enumValues contiguous for count entries starting with this one?
244 // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
245 UBool
isContiguous(int32_t count
) const {
246 const EnumToNameGroupEntry
* p
= this;
247 for (int32_t i
=1; i
<count
; ++i
) {
248 if (p
[i
].enumValue
!= (this->enumValue
+ i
)) {
256 // Sort function for EnumToNameGroupEntry (sort by name index)
258 compareEnumToNameGroupEntry(const void * /*context*/, const void* e1
, const void* e2
) {
259 return ((EnumToNameGroupEntry
*)e1
)->enumValue
- ((EnumToNameGroupEntry
*)e2
)->enumValue
;
262 //----------------------------------------------------------------------
265 * An element in the map from enumerated property enums to value maps.
267 class EnumToValueEntry
{
270 EnumToNameGroupEntry
* enumToName
;
271 int32_t enumToName_count
;
272 NameToEnumEntry
* nameToEnum
;
273 int32_t nameToEnum_count
;
275 // are enumValues contiguous for count entries starting with this one?
276 // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
277 UBool
isContiguous(int32_t count
) const {
278 const EnumToValueEntry
* p
= this;
279 for (int32_t i
=1; i
<count
; ++i
) {
280 if (p
[i
].enumValue
!= (this->enumValue
+ i
)) {
288 // Sort function for EnumToValueEntry (sort by enum)
290 compareEnumToValueEntry(const void * /*context*/, const void* e1
, const void* e2
) {
291 return ((EnumToValueEntry
*)e1
)->enumValue
- ((EnumToValueEntry
*)e2
)->enumValue
;
294 //----------------------------------------------------------------------
297 #define IS_VALID_OFFSET(x) (((x)>=0)&&((x)<=MAX_OFFSET))
301 PropertyAliases header
;
304 NonContiguousEnumToOffset
* enumToName
;
305 int32_t enumToName_size
;
306 Offset enumToName_offset
;
311 NameToEnum
* nameToEnum
;
312 int32_t nameToEnum_size
;
313 Offset nameToEnum_offset
;
316 NonContiguousEnumToOffset
* enumToValue
;
317 int32_t enumToValue_size
;
318 Offset enumToValue_offset
;
322 int32_t valueMap_size
;
323 int32_t valueMap_count
;
324 Offset valueMap_offset
;
326 // for any i, one of valueEnumToName[i] or valueNCEnumToName[i] is
327 // NULL and one is not. valueEnumToName_size[i] is the size of
328 // the non-NULL one. i=0..valueMapCount-1
330 EnumToOffset
** valueEnumToName
;
332 NonContiguousEnumToOffset
** valueNCEnumToName
;
333 int32_t* valueEnumToName_size
;
334 Offset
* valueEnumToName_offset
;
336 // arrays of valueMap_count pointers, sizes, & offsets
337 NameToEnum
** valueNameToEnum
;
338 int32_t* valueNameToEnum_size
;
339 Offset
* valueNameToEnum_offset
;
342 Offset
* nameGroupPool
;
343 int32_t nameGroupPool_count
;
344 int32_t nameGroupPool_size
;
345 Offset nameGroupPool_offset
;
349 int32_t stringPool_count
;
350 int32_t stringPool_size
;
351 Offset stringPool_offset
;
352 Offset
* stringPool_offsetArray
; // relative to stringPool
354 int32_t total_size
; // size of everything
360 Builder(int32_t debugLevel
);
363 void buildTopLevelProperties(const NameToEnumEntry
* propName
,
364 int32_t propNameCount
,
365 const EnumToNameGroupEntry
* propEnum
,
366 int32_t propEnumCount
);
368 void buildValues(const EnumToValueEntry
* e2v
,
371 void buildStringPool(const AliasName
* propertyNames
,
372 int32_t propertyNameCount
,
373 const int32_t* nameGroupIndices
,
374 int32_t nameGroupIndicesCount
);
378 int8_t* createData(int32_t& length
) const;
382 static EnumToOffset
* buildEnumToOffset(const EnumToNameGroupEntry
* e2ng
,
385 static NonContiguousEnumToOffset
*
386 buildNCEnumToNameGroup(const EnumToNameGroupEntry
* e2ng
,
390 static NonContiguousEnumToOffset
*
391 buildNCEnumToValue(const EnumToValueEntry
* e2v
,
395 static NameToEnum
* buildNameToEnum(const NameToEnumEntry
* nameToEnum
,
399 Offset
stringIndexToOffset(int32_t index
, UBool allowNeg
=FALSE
) const;
400 void fixupNameToEnum(NameToEnum
* n
);
401 void fixupEnumToNameGroup(EnumToOffset
* e2ng
);
402 void fixupNCEnumToNameGroup(NonContiguousEnumToOffset
* e2ng
);
404 void computeOffsets();
405 void fixupStringPoolOffsets();
406 void fixupNameGroupPoolOffsets();
407 void fixupMiscellaneousOffsets();
409 static int32_t align(int32_t a
);
410 static void erase(void* p
, int32_t size
);
413 Builder::Builder(int32_t debugLevel
) {
421 valueNCEnumToName
= 0;
422 valueEnumToName_size
= 0;
423 valueEnumToName_offset
= 0;
425 valueNameToEnum_size
= 0;
426 valueNameToEnum_offset
= 0;
429 stringPool_offsetArray
= 0;
432 Builder::~Builder() {
433 uprv_free(enumToName
);
434 uprv_free(nameToEnum
);
435 uprv_free(enumToValue
);
437 for (int32_t i
=0; i
<valueMap_count
; ++i
) {
438 uprv_free(valueEnumToName
[i
]);
439 uprv_free(valueNCEnumToName
[i
]);
440 uprv_free(valueNameToEnum
[i
]);
442 uprv_free(valueEnumToName
);
443 uprv_free(valueNCEnumToName
);
444 uprv_free(valueEnumToName_size
);
445 uprv_free(valueEnumToName_offset
);
446 uprv_free(valueNameToEnum
);
447 uprv_free(valueNameToEnum_size
);
448 uprv_free(valueNameToEnum_offset
);
449 uprv_free(nameGroupPool
);
450 uprv_free(stringPool
);
451 uprv_free(stringPool_offsetArray
);
454 int32_t Builder::align(int32_t a
) {
456 int32_t k
= a
% sizeof(int32_t);
460 a
+= sizeof(int32_t) - k
;
464 void Builder::erase(void* p
, int32_t size
) {
466 int8_t* q
= (int8_t*) p
;
472 EnumToOffset
* Builder::buildEnumToOffset(const EnumToNameGroupEntry
* e2ng
,
475 U_ASSERT(e2ng
->isContiguous(count
));
476 size
= align(EnumToOffset::getSize(count
));
477 EnumToOffset
* result
= (EnumToOffset
*) uprv_malloc(size
);
479 result
->enumStart
= e2ng
->enumValue
;
480 result
->enumLimit
= e2ng
->enumValue
+ count
;
481 Offset
* p
= result
->getOffsetArray();
482 for (int32_t i
=0; i
<count
; ++i
) {
483 // set these to NGI index values
484 // fix them up to NGI offset values
485 U_ASSERT(IS_VALID_OFFSET(e2ng
[i
].nameGroupIndex
));
486 p
[i
] = (Offset
) e2ng
[i
].nameGroupIndex
; // FIXUP later
491 NonContiguousEnumToOffset
*
492 Builder::buildNCEnumToNameGroup(const EnumToNameGroupEntry
* e2ng
,
495 U_ASSERT(!e2ng
->isContiguous(count
));
496 size
= align(NonContiguousEnumToOffset::getSize(count
));
497 NonContiguousEnumToOffset
* nc
= (NonContiguousEnumToOffset
*) uprv_malloc(size
);
500 EnumValue
* e
= nc
->getEnumArray();
501 Offset
* p
= nc
->getOffsetArray();
502 for (int32_t i
=0; i
<count
; ++i
) {
503 // set these to NGI index values
504 // fix them up to NGI offset values
505 e
[i
] = e2ng
[i
].enumValue
;
506 U_ASSERT(IS_VALID_OFFSET(e2ng
[i
].nameGroupIndex
));
507 p
[i
] = (Offset
) e2ng
[i
].nameGroupIndex
; // FIXUP later
512 NonContiguousEnumToOffset
*
513 Builder::buildNCEnumToValue(const EnumToValueEntry
* e2v
,
516 U_ASSERT(!e2v
->isContiguous(count
));
517 size
= align(NonContiguousEnumToOffset::getSize(count
));
518 NonContiguousEnumToOffset
* result
= (NonContiguousEnumToOffset
*) uprv_malloc(size
);
520 result
->count
= count
;
521 EnumValue
* e
= result
->getEnumArray();
522 for (int32_t i
=0; i
<count
; ++i
) {
523 e
[i
] = e2v
[i
].enumValue
;
524 // offset must be set later
530 * Given an index into the string pool, return an offset. computeOffsets()
531 * must have been called already. If allowNegative is true, allow negatives
532 * and preserve their sign.
534 Offset
Builder::stringIndexToOffset(int32_t index
, UBool allowNegative
) const {
535 // Index 0 is ""; we turn this into an Offset of zero
536 if (index
== 0) return 0;
539 return -Builder::stringIndexToOffset(-index
);
541 die("Negative string pool index");
544 if (index
>= stringPool_count
) {
545 die("String pool index too large");
547 Offset result
= stringPool_offset
+ stringPool_offsetArray
[index
];
548 U_ASSERT(result
>= 0 && result
< total_size
);
551 return 0; // never executed; make compiler happy
554 NameToEnum
* Builder::buildNameToEnum(const NameToEnumEntry
* nameToEnum
,
557 size
= align(NameToEnum::getSize(count
));
558 NameToEnum
* n2e
= (NameToEnum
*) uprv_malloc(size
);
561 Offset
* p
= n2e
->getNameArray();
562 EnumValue
* e
= n2e
->getEnumArray();
563 for (int32_t i
=0; i
<count
; ++i
) {
564 // set these to SP index values
565 // fix them up to SP offset values
566 U_ASSERT(IS_VALID_OFFSET(nameToEnum
[i
].nameIndex
));
567 p
[i
] = (Offset
) nameToEnum
[i
].nameIndex
; // FIXUP later
568 e
[i
] = nameToEnum
[i
].enumValue
;
574 void Builder::buildTopLevelProperties(const NameToEnumEntry
* propName
,
575 int32_t propNameCount
,
576 const EnumToNameGroupEntry
* propEnum
,
577 int32_t propEnumCount
) {
578 enumToName
= buildNCEnumToNameGroup(propEnum
,
581 nameToEnum
= buildNameToEnum(propName
,
586 void Builder::buildValues(const EnumToValueEntry
* e2v
,
590 U_ASSERT(!e2v
->isContiguous(count
));
592 valueMap_count
= count
;
594 enumToValue
= buildNCEnumToValue(e2v
, count
,
597 valueMap_size
= align(count
* sizeof(ValueMap
));
598 valueMap
= (ValueMap
*) uprv_malloc(valueMap_size
);
599 erase(valueMap
, valueMap_size
);
601 valueEnumToName
= MALLOC(EnumToOffset
*, count
);
602 valueNCEnumToName
= MALLOC(NonContiguousEnumToOffset
*, count
);
603 valueEnumToName_size
= MALLOC(int32_t, count
);
604 valueEnumToName_offset
= MALLOC(Offset
, count
);
605 valueNameToEnum
= MALLOC(NameToEnum
*, count
);
606 valueNameToEnum_size
= MALLOC(int32_t, count
);
607 valueNameToEnum_offset
= MALLOC(Offset
, count
);
609 for (i
=0; i
<count
; ++i
) {
611 e2v
[i
].enumToName
->isContiguous(e2v
[i
].enumToName_count
);
612 valueEnumToName
[i
] = 0;
613 valueNCEnumToName
[i
] = 0;
615 valueEnumToName
[i
] = buildEnumToOffset(e2v
[i
].enumToName
,
616 e2v
[i
].enumToName_count
,
617 valueEnumToName_size
[i
]);
619 valueNCEnumToName
[i
] = buildNCEnumToNameGroup(e2v
[i
].enumToName
,
620 e2v
[i
].enumToName_count
,
621 valueEnumToName_size
[i
]);
624 buildNameToEnum(e2v
[i
].nameToEnum
,
625 e2v
[i
].nameToEnum_count
,
626 valueNameToEnum_size
[i
]);
630 void Builder::buildStringPool(const AliasName
* propertyNames
,
631 int32_t propertyNameCount
,
632 const int32_t* nameGroupIndices
,
633 int32_t nameGroupIndicesCount
) {
636 nameGroupPool_count
= nameGroupIndicesCount
;
637 nameGroupPool_size
= sizeof(Offset
) * nameGroupPool_count
;
638 nameGroupPool
= MALLOC(Offset
, nameGroupPool_count
);
640 for (i
=0; i
<nameGroupPool_count
; ++i
) {
641 // Some indices are negative.
642 int32_t a
= nameGroupIndices
[i
];
644 U_ASSERT(IS_VALID_OFFSET(a
));
645 nameGroupPool
[i
] = (Offset
) nameGroupIndices
[i
];
648 stringPool_count
= propertyNameCount
;
650 // first string must be "" -- we skip it
651 U_ASSERT(*propertyNames
[0].str
== 0);
652 for (i
=1 /*sic*/; i
<propertyNameCount
; ++i
) {
653 stringPool_size
+= (int32_t)(uprv_strlen(propertyNames
[i
].str
) + 1);
655 stringPool
= MALLOC(char, stringPool_size
);
656 stringPool_offsetArray
= MALLOC(Offset
, stringPool_count
);
658 char* p
= stringPool
;
659 stringPool_offsetArray
[0] = -1; // we don't use this entry
660 for (i
=1 /*sic*/; i
<propertyNameCount
; ++i
) {
661 const char* str
= propertyNames
[i
].str
;
662 int32_t len
= (int32_t)uprv_strlen(str
);
666 stringPool_offsetArray
[i
] = soFar
;
667 soFar
+= (Offset
)(len
+1);
669 U_ASSERT(soFar
== stringPool_size
);
670 U_ASSERT(p
== (stringPool
+ stringPool_size
));
673 // Confirm that PropertyAliases is a POD (plain old data; see C++
674 // std). The following union will _fail to compile_ if
675 // PropertyAliases is _not_ a POD. (Note: We used to use the offsetof
676 // macro to check this, but that's not quite right, so that test is
677 // commented out -- see below.)
681 } PropertyAliasesPODTest
;
683 void Builder::computeOffsets() {
685 Offset off
= sizeof(header
);
688 printf("header \t offset=%4d size=%5d\n", 0, off
);
691 // PropertyAliases must have no v-table and must be
692 // padded (if necessary) to the next 32-bit boundary.
693 //U_ASSERT(offsetof(PropertyAliases, enumToName_offset) == 0); // see above
694 U_ASSERT(sizeof(header
) % sizeof(int32_t) == 0);
696 #define COMPUTE_OFFSET(foo) COMPUTE_OFFSET2(foo,int32_t)
698 #define COMPUTE_OFFSET2(foo,type) \
700 printf(#foo "\t offset=%4d size=%5d\n", off, (int)foo##_size);\
702 U_ASSERT(IS_VALID_OFFSET(off + foo##_size));\
703 U_ASSERT(foo##_offset % sizeof(type) == 0);\
704 off = (Offset) (off + foo##_size);
706 COMPUTE_OFFSET(enumToName
); // 0:
707 COMPUTE_OFFSET(nameToEnum
); // 2:
708 COMPUTE_OFFSET(enumToValue
); // 3:
709 COMPUTE_OFFSET(valueMap
); // 4:
711 for (i
=0; i
<valueMap_count
; ++i
) {
713 printf(" enumToName[%d]\t offset=%4d size=%5d\n",
714 (int)i
, off
, (int)valueEnumToName_size
[i
]);
717 valueEnumToName_offset
[i
] = off
; // 5:
718 U_ASSERT(IS_VALID_OFFSET(off
+ valueEnumToName_size
[i
]));
719 off
= (Offset
) (off
+ valueEnumToName_size
[i
]);
722 printf(" nameToEnum[%d]\t offset=%4d size=%5d\n",
723 (int)i
, off
, (int)valueNameToEnum_size
[i
]);
726 valueNameToEnum_offset
[i
] = off
; // 6:
727 U_ASSERT(IS_VALID_OFFSET(off
+ valueNameToEnum_size
[i
]));
728 off
= (Offset
) (off
+ valueNameToEnum_size
[i
]);
731 // These last two chunks have weaker alignment needs
732 COMPUTE_OFFSET2(nameGroupPool
,Offset
); // 98:
733 COMPUTE_OFFSET2(stringPool
,char); // 99:
736 if (debug
>0) printf("total size=%5d\n\n", (int)total_size
);
737 U_ASSERT(total_size
<= (MAX_OFFSET
+1));
740 void Builder::fixupNameToEnum(NameToEnum
* n
) {
741 // Fix the string pool offsets in n
742 Offset
* p
= n
->getNameArray();
743 for (int32_t i
=0; i
<n
->count
; ++i
) {
744 p
[i
] = stringIndexToOffset(p
[i
]);
748 void Builder::fixupStringPoolOffsets() {
752 fixupNameToEnum(nameToEnum
);
755 for (i
=0; i
<valueMap_count
; ++i
) {
756 fixupNameToEnum(valueNameToEnum
[i
]);
760 for (i
=0; i
<nameGroupPool_count
; ++i
) {
761 nameGroupPool
[i
] = stringIndexToOffset(nameGroupPool
[i
], TRUE
);
765 void Builder::fixupEnumToNameGroup(EnumToOffset
* e2ng
) {
768 Offset
* p
= e2ng
->getOffsetArray();
769 for (i
=e2ng
->enumStart
, j
=0; i
<e2ng
->enumLimit
; ++i
, ++j
) {
770 p
[j
] = nameGroupPool_offset
+ sizeof(Offset
) * p
[j
];
774 void Builder::fixupNCEnumToNameGroup(NonContiguousEnumToOffset
* e2ng
) {
776 /*EnumValue* e = e2ng->getEnumArray();*/
777 Offset
* p
= e2ng
->getOffsetArray();
778 for (i
=0; i
<e2ng
->count
; ++i
) {
779 p
[i
] = nameGroupPool_offset
+ sizeof(Offset
) * p
[i
];
783 void Builder::fixupNameGroupPoolOffsets() {
787 fixupNCEnumToNameGroup(enumToName
);
792 for (i
=0; i
<valueMap_count
; ++i
) {
794 if (valueEnumToName
[i
] != 0) {
795 fixupEnumToNameGroup(valueEnumToName
[i
]);
798 if (valueNCEnumToName
[i
] != 0) {
799 fixupNCEnumToNameGroup(valueNCEnumToName
[i
]);
804 void Builder::fixupMiscellaneousOffsets() {
808 erase(&header
, sizeof(header
));
809 header
.enumToName_offset
= enumToName_offset
;
810 header
.nameToEnum_offset
= nameToEnum_offset
;
811 header
.enumToValue_offset
= enumToValue_offset
;
812 // header meta-info used by Java:
813 U_ASSERT(total_size
> 0 && total_size
< 0x7FFF);
814 header
.total_size
= (int16_t) total_size
;
815 header
.valueMap_offset
= valueMap_offset
;
816 header
.valueMap_count
= (int16_t) valueMap_count
;
817 header
.nameGroupPool_offset
= nameGroupPool_offset
;
818 header
.nameGroupPool_count
= (int16_t) nameGroupPool_count
;
819 header
.stringPool_offset
= stringPool_offset
;
820 header
.stringPool_count
= (int16_t) stringPool_count
- 1; // don't include "" entry
822 U_ASSERT(valueMap_count
<= 0x7FFF);
823 U_ASSERT(nameGroupPool_count
<= 0x7FFF);
824 U_ASSERT(stringPool_count
<= 0x7FFF);
827 Offset
* p
= enumToValue
->getOffsetArray();
828 /*EnumValue* e = enumToValue->getEnumArray();*/
829 U_ASSERT(valueMap_count
== enumToValue
->count
);
830 for (i
=0; i
<valueMap_count
; ++i
) {
831 p
[i
] = (Offset
)(valueMap_offset
+ sizeof(ValueMap
) * i
);
835 for (i
=0; i
<valueMap_count
; ++i
) {
836 ValueMap
& v
= valueMap
[i
];
837 v
.enumToName_offset
= v
.ncEnumToName_offset
= 0;
838 if (valueEnumToName
[i
] != 0) {
839 v
.enumToName_offset
= valueEnumToName_offset
[i
];
841 if (valueNCEnumToName
[i
] != 0) {
842 v
.ncEnumToName_offset
= valueEnumToName_offset
[i
];
844 v
.nameToEnum_offset
= valueNameToEnum_offset
[i
];
848 void Builder::fixup() {
850 fixupStringPoolOffsets();
851 fixupNameGroupPoolOffsets();
852 fixupMiscellaneousOffsets();
855 int8_t* Builder::createData(int32_t& length
) const {
857 int8_t* result
= MALLOC(int8_t, length
);
860 int8_t* limit
= result
+ length
;
862 #define APPEND2(x, size) \
863 U_ASSERT((p+size)<=limit); \
864 uprv_memcpy(p, x, size); \
867 #define APPEND(x) APPEND2(x, x##_size)
869 APPEND2(&header
, sizeof(header
));
875 for (int32_t i
=0; i
<valueMap_count
; ++i
) {
876 U_ASSERT((valueEnumToName
[i
] != 0 && valueNCEnumToName
[i
] == 0) ||
877 (valueEnumToName
[i
] == 0 && valueNCEnumToName
[i
] != 0));
878 if (valueEnumToName
[i
] != 0) {
879 APPEND2(valueEnumToName
[i
], valueEnumToName_size
[i
]);
881 if (valueNCEnumToName
[i
] != 0) {
882 APPEND2(valueNCEnumToName
[i
], valueEnumToName_size
[i
]);
884 APPEND2(valueNameToEnum
[i
], valueNameToEnum_size
[i
]);
887 APPEND(nameGroupPool
);
891 fprintf(stderr
, "p != limit; p = %p, limit = %p", p
, limit
);
898 //----------------------------------------------------------------------
900 /* UDataInfo cf. udata.h */
901 static UDataInfo dataInfo
= {
910 {PNAME_SIG_0
, PNAME_SIG_1
, PNAME_SIG_2
, PNAME_SIG_3
},
911 {PNAME_FORMAT_VERSION
, 0, 0, 0}, /* formatVersion */
912 {VERSION_0
, VERSION_1
, VERSION_2
, VERSION_3
} /* Unicode version */
917 // command-line options
923 int MMain(int argc
, char *argv
[]);
926 NameToEnumEntry
* createNameIndex(const AliasList
& list
,
927 int32_t& nameIndexCount
);
929 EnumToNameGroupEntry
* createEnumIndex(const AliasList
& list
);
931 int32_t writeDataFile(const char *destdir
, const Builder
&);
934 int main(int argc
, char *argv
[]) {
935 UErrorCode status
= U_ZERO_ERROR
;
937 if (U_FAILURE(status
) && status
!= U_FILE_ACCESS_ERROR
) {
938 // Note: u_init() will try to open ICU property data.
939 // failures here are expected when building ICU from scratch.
941 fprintf(stderr
, "genpname: can not initialize ICU. Status = %s\n",
942 u_errorName(status
));
947 U_MAIN_INIT_ARGS(argc
, argv
);
948 int retVal
= app
.MMain(argc
, argv
);
953 static UOption options
[]={
955 UOPTION_HELP_QUESTION_MARK
,
959 UOPTION_DEF("debug", 'D', UOPT_REQUIRES_ARG
),
962 NameToEnumEntry
* genpname::createNameIndex(const AliasList
& list
,
963 int32_t& nameIndexCount
) {
965 // Build name => enum map
967 // This is an n->1 map. There are typically multiple names
968 // mapping to one enum. The name index is sorted in order of the name,
969 // as defined by the uprv_compareAliasNames() function.
972 int32_t count
= list
.count();
974 // compute upper limit on number of names in the index
975 int32_t nameIndexCapacity
= count
* MAX_NAMES_PER_GROUP
;
976 NameToEnumEntry
* nameIndex
= MALLOC(NameToEnumEntry
, nameIndexCapacity
);
979 int32_t names
[MAX_NAMES_PER_GROUP
];
980 for (i
=0; i
<count
; ++i
) {
981 const Alias
& p
= list
[i
];
982 int32_t n
= p
.getUniqueNames(names
);
983 for (j
=0; j
<n
; ++j
) {
984 U_ASSERT(nameIndexCount
< nameIndexCapacity
);
985 nameIndex
[nameIndexCount
++] =
986 NameToEnumEntry(names
[j
], p
.enumValue
);
991 * use a stable sort to ensure consistent results between
992 * genpname.cpp and the propname.cpp swapping code
994 UErrorCode errorCode
= U_ZERO_ERROR
;
995 uprv_sortArray(nameIndex
, nameIndexCount
, sizeof(nameIndex
[0]),
996 compareNameToEnumEntry
, NULL
, TRUE
, &errorCode
);
998 printf("Alias names: %d\n", (int)nameIndexCount
);
999 for (i
=0; i
<nameIndexCount
; ++i
) {
1000 printf("%s => %d\n",
1001 STRING_TABLE
[nameIndex
[i
].nameIndex
].str
,
1002 (int)nameIndex
[i
].enumValue
);
1006 // make sure there are no duplicates. for a sorted list we need
1007 // only compare adjacent items. Alias.getUniqueNames() has
1008 // already eliminated duplicate names for a single property, which
1009 // does occur, so we're checking for duplicate names between two
1010 // properties, which should never occur.
1012 for (i
=1; i
<nameIndexCount
; ++i
) {
1013 if (STRING_TABLE
[nameIndex
[i
-1].nameIndex
] ==
1014 STRING_TABLE
[nameIndex
[i
].nameIndex
]) {
1015 printf("Error: Duplicate names in property list: \"%s\", \"%s\"\n",
1016 STRING_TABLE
[nameIndex
[i
-1].nameIndex
].str
,
1017 STRING_TABLE
[nameIndex
[i
].nameIndex
].str
);
1022 die("Two or more duplicate names in property list");
1028 EnumToNameGroupEntry
* genpname::createEnumIndex(const AliasList
& list
) {
1030 // Build the enum => name map
1032 // This is a 1->n map. Each enum maps to 1 or more names. To
1033 // accomplish this the index entry points to an element of the
1034 // NAME_GROUP array. This is the short name (which may be empty).
1035 // From there, subsequent elements of NAME_GROUP are alternate
1036 // names for this enum, up to and including the first one that is
1037 // negative (negate for actual index).
1040 int32_t count
= list
.count();
1042 EnumToNameGroupEntry
* enumIndex
= MALLOC(EnumToNameGroupEntry
, count
);
1043 for (i
=0; i
<count
; ++i
) {
1044 const Alias
& p
= list
[i
];
1045 enumIndex
[i
] = EnumToNameGroupEntry(p
.enumValue
, p
.nameGroupIndex
);
1048 UErrorCode errorCode
= U_ZERO_ERROR
;
1049 uprv_sortArray(enumIndex
, count
, sizeof(enumIndex
[0]),
1050 compareEnumToNameGroupEntry
, NULL
, FALSE
, &errorCode
);
1052 printf("Property enums: %d\n", (int)count
);
1053 for (i
=0; i
<count
; ++i
) {
1054 printf("%d => %d: ",
1055 (int)enumIndex
[i
].enumValue
,
1056 (int)enumIndex
[i
].nameGroupIndex
);
1058 for (j
=enumIndex
[i
].nameGroupIndex
; !done
; ++j
) {
1064 printf("\"%s\"", STRING_TABLE
[k
].str
);
1065 if (!done
) printf(", ");
1074 int genpname::MMain(int argc
, char* argv
[])
1077 UErrorCode status
= U_ZERO_ERROR
;
1080 if (U_FAILURE(status
) && status
!= U_FILE_ACCESS_ERROR
) {
1081 fprintf(stderr
, "Error: u_init returned %s\n", u_errorName(status
));
1082 status
= U_ZERO_ERROR
;
1086 /* preset then read command line options */
1087 options
[3].value
=u_getDataDirectory();
1088 argc
=u_parseArgs(argc
, argv
, sizeof(options
)/sizeof(options
[0]), options
);
1090 /* error handling, printing usage message */
1093 "error in command line argument \"%s\"\n",
1097 debug
= options
[5].doesOccur
? (*options
[5].value
- '0') : 0;
1099 if (argc
!=1 || options
[0].doesOccur
|| options
[1].doesOccur
||
1100 debug
< 0 || debug
> 9) {
1102 "usage: %s [-options]\n"
1103 "\tcreate " PNAME_DATA_NAME
"." PNAME_DATA_TYPE
"\n"
1105 "\t-h or -? or --help this usage text\n"
1106 "\t-v or --verbose turn on verbose output\n"
1107 "\t-c or --copyright include a copyright notice\n"
1108 "\t-d or --destdir destination directory, followed by the path\n"
1109 "\t-D or --debug 0..9 emit debugging messages (if > 0)\n",
1111 return argc
<0 ? U_ILLEGAL_ARGUMENT_ERROR
: U_ZERO_ERROR
;
1114 /* get the options values */
1115 useCopyright
=options
[2].doesOccur
;
1116 verbose
= options
[4].doesOccur
;
1118 // ------------------------------------------------------------
1119 // Do not sort the string table, instead keep it in data.h order.
1120 // This simplifies data swapping and testing thereof because the string
1121 // table itself need not be sorted during swapping.
1122 // The NameToEnum sorter sorts each such map's string offsets instead.
1125 printf("String pool: %d\n", (int)STRING_COUNT
);
1126 for (i
=0; i
<STRING_COUNT
; ++i
) {
1130 printf("%s (%d)", STRING_TABLE
[i
].str
, (int)STRING_TABLE
[i
].index
);
1135 // ------------------------------------------------------------
1136 // Create top-level property indices
1138 PropertyArrayList
props(PROPERTY
, PROPERTY_COUNT
);
1139 int32_t propNameCount
;
1140 NameToEnumEntry
* propName
= createNameIndex(props
, propNameCount
);
1141 EnumToNameGroupEntry
* propEnum
= createEnumIndex(props
);
1143 // ------------------------------------------------------------
1144 // Create indices for the value list for each enumerated property
1146 // This will have more entries than we need...
1147 EnumToValueEntry
* enumToValue
= MALLOC(EnumToValueEntry
, PROPERTY_COUNT
);
1148 int32_t enumToValue_count
= 0;
1149 for (i
=0, j
=0; i
<PROPERTY_COUNT
; ++i
) {
1150 if (PROPERTY
[i
].valueCount
== 0) continue;
1151 AliasArrayList
values(PROPERTY
[i
].valueList
,
1152 PROPERTY
[i
].valueCount
);
1153 enumToValue
[j
].enumValue
= PROPERTY
[i
].enumValue
;
1154 enumToValue
[j
].enumToName
= createEnumIndex(values
);
1155 enumToValue
[j
].enumToName_count
= PROPERTY
[i
].valueCount
;
1156 enumToValue
[j
].nameToEnum
= createNameIndex(values
,
1157 enumToValue
[j
].nameToEnum_count
);
1160 enumToValue_count
= j
;
1162 uprv_sortArray(enumToValue
, enumToValue_count
, sizeof(enumToValue
[0]),
1163 compareEnumToValueEntry
, NULL
, FALSE
, &status
);
1165 // ------------------------------------------------------------
1166 // Build PropertyAliases layout in memory
1168 Builder
builder(debug
);
1170 builder
.buildTopLevelProperties(propName
,
1175 builder
.buildValues(enumToValue
,
1178 builder
.buildStringPool(STRING_TABLE
,
1185 ////////////////////////////////////////////////////////////
1186 // Write the output file
1187 ////////////////////////////////////////////////////////////
1188 int32_t wlen
= writeDataFile(options
[3].value
, builder
);
1190 fprintf(stdout
, "Output file: %s.%s, %ld bytes\n",
1191 U_ICUDATA_NAME
"_" PNAME_DATA_NAME
, PNAME_DATA_TYPE
, (long)wlen
);
1194 return 0; // success
1197 int32_t genpname::writeDataFile(const char *destdir
, const Builder
& builder
) {
1199 int8_t* data
= builder
.createData(length
);
1201 UNewDataMemory
*pdata
;
1202 UErrorCode status
= U_ZERO_ERROR
;
1204 pdata
= udata_create(destdir
, PNAME_DATA_TYPE
, PNAME_DATA_NAME
, &dataInfo
,
1205 useCopyright
? U_COPYRIGHT_STRING
: 0, &status
);
1206 if (U_FAILURE(status
)) {
1207 die("Unable to create data memory");
1210 udata_writeBlock(pdata
, data
, length
);
1212 int32_t dataLength
= (int32_t) udata_finish(pdata
, &status
);
1213 if (U_FAILURE(status
)) {
1214 die("Error writing output file");
1216 if (dataLength
!= length
) {
1217 die("Written file doesn't match expected size");