2 **********************************************************************
3 * Copyright (C) 2002-2004, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 10/11/02 aliu Creation.
8 **********************************************************************
11 #include "unicode/utypes.h"
12 #include "unicode/putil.h"
13 #include "unicode/uclean.h"
26 // TODO: Clean up and comment this code.
28 //----------------------------------------------------------------------
31 // This is the raw data to be output. We define the data structure,
32 // then include a machine-generated header that contains the actual
35 #include "unicode/uchar.h"
36 #include "unicode/uscript.h"
37 #include "unicode/unorm.h"
44 AliasName(const char* str
, int32_t index
);
46 int compare(const AliasName
& other
) const;
48 UBool
operator==(const AliasName
& other
) const {
49 return compare(other
) == 0;
52 UBool
operator!=(const AliasName
& other
) const {
53 return compare(other
) != 0;
57 AliasName::AliasName(const char* _str
,
64 int AliasName::compare(const AliasName
& other
) const {
65 return uprv_comparePropertyNames(str
, other
.str
);
71 int32_t nameGroupIndex
;
73 Alias(int32_t enumValue
,
74 int32_t nameGroupIndex
);
76 int32_t getUniqueNames(int32_t* nameGroupIndices
) const;
79 Alias::Alias(int32_t anEnumValue
,
80 int32_t aNameGroupIndex
) :
81 enumValue(anEnumValue
),
82 nameGroupIndex(aNameGroupIndex
)
86 class Property
: public Alias
{
89 const Alias
* valueList
;
91 Property(int32_t enumValue
,
92 int32_t nameGroupIndex
,
94 const Alias
* valueList
);
97 Property::Property(int32_t _enumValue
,
98 int32_t _nameGroupIndex
,
100 const Alias
* _valueList
) :
101 Alias(_enumValue
, _nameGroupIndex
),
102 valueCount(_valueCount
),
103 valueList(_valueList
)
107 // *** Include the data header ***
110 /* return a list of unique names, not including "", for this property
111 * @param stringIndices array of at least MAX_NAMES_PER_GROUP
112 * elements, will be filled with indices into STRING_TABLE
113 * @return number of indices, >= 1
115 int32_t Alias::getUniqueNames(int32_t* stringIndices
) const {
117 int32_t i
= nameGroupIndex
;
120 int32_t j
= NAME_GROUP
[i
++];
125 if (j
== 0) continue; // omit "" entries
127 for (int32_t k
=0; k
<count
; ++k
) {
128 if (stringIndices
[k
] == j
) {
132 // also do a string check for things like "age|Age"
133 if (STRING_TABLE
[stringIndices
[k
]] == STRING_TABLE
[j
]) {
134 //printf("Found dupe %s|%s\n",
135 // STRING_TABLE[stringIndices[k]].str,
136 // STRING_TABLE[j].str);
141 if (dupe
) continue; // omit duplicates
142 stringIndices
[count
++] = j
;
148 //----------------------------------------------------------------------
150 #define MALLOC(type, count) \
151 (type*) uprv_malloc(sizeof(type) * count)
153 void die(const char* msg
) {
154 fprintf(stderr
, "Error: %s\n", msg
);
158 //----------------------------------------------------------------------
161 * A list of Alias objects.
165 virtual const Alias
& operator[](int32_t i
) const = 0;
166 virtual int32_t count() const = 0;
172 class AliasArrayList
: public AliasList
{
176 AliasArrayList(const Alias
* _a
, int32_t _n
) {
180 virtual const Alias
& operator[](int32_t i
) const {
183 virtual int32_t count() const {
191 class PropertyArrayList
: public AliasList
{
195 PropertyArrayList(const Property
* _a
, int32_t _n
) {
199 virtual const Alias
& operator[](int32_t i
) const {
202 virtual int32_t count() const {
207 //----------------------------------------------------------------------
210 * An element in a name index. It maps a name (given by index) into
213 class NameToEnumEntry
{
217 NameToEnumEntry(int32_t a
, int32_t b
) { nameIndex
=a
; enumValue
=b
; }
220 // Sort function for NameToEnumEntry (sort by name)
222 compareNameToEnumEntry(const void * /*context*/, const void* e1
, const void* e2
) {
224 STRING_TABLE
[((NameToEnumEntry
*)e1
)->nameIndex
].
225 compare(STRING_TABLE
[((NameToEnumEntry
*)e2
)->nameIndex
]);
228 //----------------------------------------------------------------------
231 * An element in an enum index. It maps an enum into a name group entry
234 class EnumToNameGroupEntry
{
237 int32_t nameGroupIndex
;
238 EnumToNameGroupEntry(int32_t a
, int32_t b
) { enumValue
=a
; nameGroupIndex
=b
; }
240 // are enumValues contiguous for count entries starting with this one?
241 // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
242 UBool
isContiguous(int32_t count
) const {
243 const EnumToNameGroupEntry
* p
= this;
244 for (int32_t i
=1; i
<count
; ++i
) {
245 if (p
[i
].enumValue
!= (this->enumValue
+ i
)) {
253 // Sort function for EnumToNameGroupEntry (sort by name index)
255 compareEnumToNameGroupEntry(const void * /*context*/, const void* e1
, const void* e2
) {
256 return ((EnumToNameGroupEntry
*)e1
)->enumValue
- ((EnumToNameGroupEntry
*)e2
)->enumValue
;
259 //----------------------------------------------------------------------
262 * An element in the map from enumerated property enums to value maps.
264 class EnumToValueEntry
{
267 EnumToNameGroupEntry
* enumToName
;
268 int32_t enumToName_count
;
269 NameToEnumEntry
* nameToEnum
;
270 int32_t nameToEnum_count
;
272 // are enumValues contiguous for count entries starting with this one?
273 // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
274 UBool
isContiguous(int32_t count
) const {
275 const EnumToValueEntry
* p
= this;
276 for (int32_t i
=1; i
<count
; ++i
) {
277 if (p
[i
].enumValue
!= (this->enumValue
+ i
)) {
285 // Sort function for EnumToValueEntry (sort by enum)
287 compareEnumToValueEntry(const void * /*context*/, const void* e1
, const void* e2
) {
288 return ((EnumToValueEntry
*)e1
)->enumValue
- ((EnumToValueEntry
*)e2
)->enumValue
;
291 //----------------------------------------------------------------------
294 #define IS_VALID_OFFSET(x) (((x)>=0)&&((x)<=MAX_OFFSET))
298 PropertyAliases header
;
301 NonContiguousEnumToOffset
* enumToName
;
302 int32_t enumToName_size
;
303 Offset enumToName_offset
;
308 NameToEnum
* nameToEnum
;
309 int32_t nameToEnum_size
;
310 Offset nameToEnum_offset
;
313 NonContiguousEnumToOffset
* enumToValue
;
314 int32_t enumToValue_size
;
315 Offset enumToValue_offset
;
319 int32_t valueMap_size
;
320 int32_t valueMap_count
;
321 Offset valueMap_offset
;
323 // for any i, one of valueEnumToName[i] or valueNCEnumToName[i] is
324 // NULL and one is not. valueEnumToName_size[i] is the size of
325 // the non-NULL one. i=0..valueMapCount-1
327 EnumToOffset
** valueEnumToName
;
329 NonContiguousEnumToOffset
** valueNCEnumToName
;
330 int32_t* valueEnumToName_size
;
331 Offset
* valueEnumToName_offset
;
333 // arrays of valueMap_count pointers, sizes, & offsets
334 NameToEnum
** valueNameToEnum
;
335 int32_t* valueNameToEnum_size
;
336 Offset
* valueNameToEnum_offset
;
339 Offset
* nameGroupPool
;
340 int32_t nameGroupPool_count
;
341 int32_t nameGroupPool_size
;
342 Offset nameGroupPool_offset
;
346 int32_t stringPool_count
;
347 int32_t stringPool_size
;
348 Offset stringPool_offset
;
349 Offset
* stringPool_offsetArray
; // relative to stringPool
351 int32_t total_size
; // size of everything
357 Builder(int32_t debugLevel
);
360 void buildTopLevelProperties(const NameToEnumEntry
* propName
,
361 int32_t propNameCount
,
362 const EnumToNameGroupEntry
* propEnum
,
363 int32_t propEnumCount
);
365 void buildValues(const EnumToValueEntry
* e2v
,
368 void buildStringPool(const AliasName
* propertyNames
,
369 int32_t propertyNameCount
,
370 const int32_t* nameGroupIndices
,
371 int32_t nameGroupIndicesCount
);
375 int8_t* createData(int32_t& length
) const;
379 static EnumToOffset
* buildEnumToOffset(const EnumToNameGroupEntry
* e2ng
,
382 static NonContiguousEnumToOffset
*
383 buildNCEnumToNameGroup(const EnumToNameGroupEntry
* e2ng
,
387 static NonContiguousEnumToOffset
*
388 buildNCEnumToValue(const EnumToValueEntry
* e2v
,
392 static NameToEnum
* buildNameToEnum(const NameToEnumEntry
* nameToEnum
,
396 Offset
stringIndexToOffset(int32_t index
, UBool allowNeg
=FALSE
) const;
397 void fixupNameToEnum(NameToEnum
* n
);
398 void fixupEnumToNameGroup(EnumToOffset
* e2ng
);
399 void fixupNCEnumToNameGroup(NonContiguousEnumToOffset
* e2ng
);
401 void computeOffsets();
402 void fixupStringPoolOffsets();
403 void fixupNameGroupPoolOffsets();
404 void fixupMiscellaneousOffsets();
406 static int32_t align(int32_t a
);
407 static void erase(void* p
, int32_t size
);
410 Builder::Builder(int32_t debugLevel
) {
418 valueNCEnumToName
= 0;
419 valueEnumToName_size
= 0;
420 valueEnumToName_offset
= 0;
422 valueNameToEnum_size
= 0;
423 valueNameToEnum_offset
= 0;
426 stringPool_offsetArray
= 0;
429 Builder::~Builder() {
430 uprv_free(enumToName
);
431 uprv_free(nameToEnum
);
432 uprv_free(enumToValue
);
434 for (int32_t i
=0; i
<valueMap_count
; ++i
) {
435 uprv_free(valueEnumToName
[i
]);
436 uprv_free(valueNCEnumToName
[i
]);
437 uprv_free(valueNameToEnum
[i
]);
439 uprv_free(valueEnumToName
);
440 uprv_free(valueNCEnumToName
);
441 uprv_free(valueEnumToName_size
);
442 uprv_free(valueEnumToName_offset
);
443 uprv_free(valueNameToEnum
);
444 uprv_free(valueNameToEnum_size
);
445 uprv_free(valueNameToEnum_offset
);
446 uprv_free(nameGroupPool
);
447 uprv_free(stringPool
);
448 uprv_free(stringPool_offsetArray
);
451 int32_t Builder::align(int32_t a
) {
453 int32_t k
= a
% sizeof(int32_t);
457 a
+= sizeof(int32_t) - k
;
461 void Builder::erase(void* p
, int32_t size
) {
463 int8_t* q
= (int8_t*) p
;
469 EnumToOffset
* Builder::buildEnumToOffset(const EnumToNameGroupEntry
* e2ng
,
472 U_ASSERT(e2ng
->isContiguous(count
));
473 size
= align(EnumToOffset::getSize(count
));
474 EnumToOffset
* result
= (EnumToOffset
*) uprv_malloc(size
);
476 result
->enumStart
= e2ng
->enumValue
;
477 result
->enumLimit
= e2ng
->enumValue
+ count
;
478 Offset
* p
= result
->getOffsetArray();
479 for (int32_t i
=0; i
<count
; ++i
) {
480 // set these to NGI index values
481 // fix them up to NGI offset values
482 U_ASSERT(IS_VALID_OFFSET(e2ng
[i
].nameGroupIndex
));
483 p
[i
] = (Offset
) e2ng
[i
].nameGroupIndex
; // FIXUP later
488 NonContiguousEnumToOffset
*
489 Builder::buildNCEnumToNameGroup(const EnumToNameGroupEntry
* e2ng
,
492 U_ASSERT(!e2ng
->isContiguous(count
));
493 size
= align(NonContiguousEnumToOffset::getSize(count
));
494 NonContiguousEnumToOffset
* nc
= (NonContiguousEnumToOffset
*) uprv_malloc(size
);
497 EnumValue
* e
= nc
->getEnumArray();
498 Offset
* p
= nc
->getOffsetArray();
499 for (int32_t i
=0; i
<count
; ++i
) {
500 // set these to NGI index values
501 // fix them up to NGI offset values
502 e
[i
] = e2ng
[i
].enumValue
;
503 U_ASSERT(IS_VALID_OFFSET(e2ng
[i
].nameGroupIndex
));
504 p
[i
] = (Offset
) e2ng
[i
].nameGroupIndex
; // FIXUP later
509 NonContiguousEnumToOffset
*
510 Builder::buildNCEnumToValue(const EnumToValueEntry
* e2v
,
513 U_ASSERT(!e2v
->isContiguous(count
));
514 size
= align(NonContiguousEnumToOffset::getSize(count
));
515 NonContiguousEnumToOffset
* result
= (NonContiguousEnumToOffset
*) uprv_malloc(size
);
517 result
->count
= count
;
518 EnumValue
* e
= result
->getEnumArray();
519 for (int32_t i
=0; i
<count
; ++i
) {
520 e
[i
] = e2v
[i
].enumValue
;
521 // offset must be set later
527 * Given an index into the string pool, return an offset. computeOffsets()
528 * must have been called already. If allowNegative is true, allow negatives
529 * and preserve their sign.
531 Offset
Builder::stringIndexToOffset(int32_t index
, UBool allowNegative
) const {
532 // Index 0 is ""; we turn this into an Offset of zero
533 if (index
== 0) return 0;
536 return -Builder::stringIndexToOffset(-index
);
538 die("Negative string pool index");
541 if (index
>= stringPool_count
) {
542 die("String pool index too large");
544 Offset result
= stringPool_offset
+ stringPool_offsetArray
[index
];
545 U_ASSERT(result
>= 0 && result
< total_size
);
548 return 0; // never executed; make compiler happy
551 NameToEnum
* Builder::buildNameToEnum(const NameToEnumEntry
* nameToEnum
,
554 size
= align(NameToEnum::getSize(count
));
555 NameToEnum
* n2e
= (NameToEnum
*) uprv_malloc(size
);
558 Offset
* p
= n2e
->getNameArray();
559 EnumValue
* e
= n2e
->getEnumArray();
560 for (int32_t i
=0; i
<count
; ++i
) {
561 // set these to SP index values
562 // fix them up to SP offset values
563 U_ASSERT(IS_VALID_OFFSET(nameToEnum
[i
].nameIndex
));
564 p
[i
] = (Offset
) nameToEnum
[i
].nameIndex
; // FIXUP later
565 e
[i
] = nameToEnum
[i
].enumValue
;
571 void Builder::buildTopLevelProperties(const NameToEnumEntry
* propName
,
572 int32_t propNameCount
,
573 const EnumToNameGroupEntry
* propEnum
,
574 int32_t propEnumCount
) {
575 enumToName
= buildNCEnumToNameGroup(propEnum
,
578 nameToEnum
= buildNameToEnum(propName
,
583 void Builder::buildValues(const EnumToValueEntry
* e2v
,
587 U_ASSERT(!e2v
->isContiguous(count
));
589 valueMap_count
= count
;
591 enumToValue
= buildNCEnumToValue(e2v
, count
,
594 valueMap_size
= align(count
* sizeof(ValueMap
));
595 valueMap
= (ValueMap
*) uprv_malloc(valueMap_size
);
596 erase(valueMap
, valueMap_size
);
598 valueEnumToName
= MALLOC(EnumToOffset
*, count
);
599 valueNCEnumToName
= MALLOC(NonContiguousEnumToOffset
*, count
);
600 valueEnumToName_size
= MALLOC(int32_t, count
);
601 valueEnumToName_offset
= MALLOC(Offset
, count
);
602 valueNameToEnum
= MALLOC(NameToEnum
*, count
);
603 valueNameToEnum_size
= MALLOC(int32_t, count
);
604 valueNameToEnum_offset
= MALLOC(Offset
, count
);
606 for (i
=0; i
<count
; ++i
) {
608 e2v
[i
].enumToName
->isContiguous(e2v
[i
].enumToName_count
);
609 valueEnumToName
[i
] = 0;
610 valueNCEnumToName
[i
] = 0;
612 valueEnumToName
[i
] = buildEnumToOffset(e2v
[i
].enumToName
,
613 e2v
[i
].enumToName_count
,
614 valueEnumToName_size
[i
]);
616 valueNCEnumToName
[i
] = buildNCEnumToNameGroup(e2v
[i
].enumToName
,
617 e2v
[i
].enumToName_count
,
618 valueEnumToName_size
[i
]);
621 buildNameToEnum(e2v
[i
].nameToEnum
,
622 e2v
[i
].nameToEnum_count
,
623 valueNameToEnum_size
[i
]);
627 void Builder::buildStringPool(const AliasName
* propertyNames
,
628 int32_t propertyNameCount
,
629 const int32_t* nameGroupIndices
,
630 int32_t nameGroupIndicesCount
) {
633 nameGroupPool_count
= nameGroupIndicesCount
;
634 nameGroupPool_size
= sizeof(Offset
) * nameGroupPool_count
;
635 nameGroupPool
= MALLOC(Offset
, nameGroupPool_count
);
637 for (i
=0; i
<nameGroupPool_count
; ++i
) {
638 // Some indices are negative.
639 int32_t a
= nameGroupIndices
[i
];
641 U_ASSERT(IS_VALID_OFFSET(a
));
642 nameGroupPool
[i
] = (Offset
) nameGroupIndices
[i
];
645 stringPool_count
= propertyNameCount
;
647 // first string must be "" -- we skip it
648 U_ASSERT(*propertyNames
[0].str
== 0);
649 for (i
=1 /*sic*/; i
<propertyNameCount
; ++i
) {
650 stringPool_size
+= (int32_t)(uprv_strlen(propertyNames
[i
].str
) + 1);
652 stringPool
= MALLOC(char, stringPool_size
);
653 stringPool_offsetArray
= MALLOC(Offset
, stringPool_count
);
655 char* p
= stringPool
;
656 stringPool_offsetArray
[0] = -1; // we don't use this entry
657 for (i
=1 /*sic*/; i
<propertyNameCount
; ++i
) {
658 const char* str
= propertyNames
[i
].str
;
659 int32_t len
= (int32_t)uprv_strlen(str
);
663 stringPool_offsetArray
[i
] = soFar
;
664 soFar
+= (Offset
)(len
+1);
666 U_ASSERT(soFar
== stringPool_size
);
667 U_ASSERT(p
== (stringPool
+ stringPool_size
));
670 // Confirm that PropertyAliases is a POD (plain old data; see C++
671 // std). The following union will _fail to compile_ if
672 // PropertyAliases is _not_ a POD. (Note: We used to use the offsetof
673 // macro to check this, but that's not quite right, so that test is
674 // commented out -- see below.)
678 } PropertyAliasesPODTest
;
680 void Builder::computeOffsets() {
682 Offset off
= sizeof(header
);
685 printf("header \t offset=%4d size=%5d\n", 0, off
);
688 // PropertyAliases must have no v-table and must be
689 // padded (if necessary) to the next 32-bit boundary.
690 //U_ASSERT(offsetof(PropertyAliases, enumToName_offset) == 0); // see above
691 U_ASSERT(sizeof(header
) % sizeof(int32_t) == 0);
693 #define COMPUTE_OFFSET(foo) COMPUTE_OFFSET2(foo,int32_t)
695 #define COMPUTE_OFFSET2(foo,type) \
697 printf(#foo "\t offset=%4d size=%5d\n", off, (int)foo##_size);\
699 U_ASSERT(IS_VALID_OFFSET(off + foo##_size));\
700 U_ASSERT(foo##_offset % sizeof(type) == 0);\
701 off = (Offset) (off + foo##_size);
703 COMPUTE_OFFSET(enumToName
); // 0:
704 COMPUTE_OFFSET(nameToEnum
); // 2:
705 COMPUTE_OFFSET(enumToValue
); // 3:
706 COMPUTE_OFFSET(valueMap
); // 4:
708 for (i
=0; i
<valueMap_count
; ++i
) {
710 printf(" enumToName[%d]\t offset=%4d size=%5d\n",
711 (int)i
, off
, (int)valueEnumToName_size
[i
]);
714 valueEnumToName_offset
[i
] = off
; // 5:
715 U_ASSERT(IS_VALID_OFFSET(off
+ valueEnumToName_size
[i
]));
716 off
= (Offset
) (off
+ valueEnumToName_size
[i
]);
719 printf(" nameToEnum[%d]\t offset=%4d size=%5d\n",
720 (int)i
, off
, (int)valueNameToEnum_size
[i
]);
723 valueNameToEnum_offset
[i
] = off
; // 6:
724 U_ASSERT(IS_VALID_OFFSET(off
+ valueNameToEnum_size
[i
]));
725 off
= (Offset
) (off
+ valueNameToEnum_size
[i
]);
728 // These last two chunks have weaker alignment needs
729 COMPUTE_OFFSET2(nameGroupPool
,Offset
); // 98:
730 COMPUTE_OFFSET2(stringPool
,char); // 99:
733 if (debug
>0) printf("total size=%5d\n\n", (int)total_size
);
734 U_ASSERT(total_size
<= (MAX_OFFSET
+1));
737 void Builder::fixupNameToEnum(NameToEnum
* n
) {
738 // Fix the string pool offsets in n
739 Offset
* p
= n
->getNameArray();
740 for (int32_t i
=0; i
<n
->count
; ++i
) {
741 p
[i
] = stringIndexToOffset(p
[i
]);
745 void Builder::fixupStringPoolOffsets() {
749 fixupNameToEnum(nameToEnum
);
752 for (i
=0; i
<valueMap_count
; ++i
) {
753 fixupNameToEnum(valueNameToEnum
[i
]);
757 for (i
=0; i
<nameGroupPool_count
; ++i
) {
758 nameGroupPool
[i
] = stringIndexToOffset(nameGroupPool
[i
], TRUE
);
762 void Builder::fixupEnumToNameGroup(EnumToOffset
* e2ng
) {
765 Offset
* p
= e2ng
->getOffsetArray();
766 for (i
=e2ng
->enumStart
, j
=0; i
<e2ng
->enumLimit
; ++i
, ++j
) {
767 p
[j
] = nameGroupPool_offset
+ sizeof(Offset
) * p
[j
];
771 void Builder::fixupNCEnumToNameGroup(NonContiguousEnumToOffset
* e2ng
) {
773 /*EnumValue* e = e2ng->getEnumArray();*/
774 Offset
* p
= e2ng
->getOffsetArray();
775 for (i
=0; i
<e2ng
->count
; ++i
) {
776 p
[i
] = nameGroupPool_offset
+ sizeof(Offset
) * p
[i
];
780 void Builder::fixupNameGroupPoolOffsets() {
784 fixupNCEnumToNameGroup(enumToName
);
789 for (i
=0; i
<valueMap_count
; ++i
) {
791 if (valueEnumToName
[i
] != 0) {
792 fixupEnumToNameGroup(valueEnumToName
[i
]);
795 if (valueNCEnumToName
[i
] != 0) {
796 fixupNCEnumToNameGroup(valueNCEnumToName
[i
]);
801 void Builder::fixupMiscellaneousOffsets() {
805 erase(&header
, sizeof(header
));
806 header
.enumToName_offset
= enumToName_offset
;
807 header
.nameToEnum_offset
= nameToEnum_offset
;
808 header
.enumToValue_offset
= enumToValue_offset
;
809 // header meta-info used by Java:
810 U_ASSERT(total_size
> 0 && total_size
< 0x7FFF);
811 header
.total_size
= (int16_t) total_size
;
812 header
.valueMap_offset
= valueMap_offset
;
813 header
.valueMap_count
= (int16_t) valueMap_count
;
814 header
.nameGroupPool_offset
= nameGroupPool_offset
;
815 header
.nameGroupPool_count
= (int16_t) nameGroupPool_count
;
816 header
.stringPool_offset
= stringPool_offset
;
817 header
.stringPool_count
= (int16_t) stringPool_count
- 1; // don't include "" entry
819 U_ASSERT(valueMap_count
<= 0x7FFF);
820 U_ASSERT(nameGroupPool_count
<= 0x7FFF);
821 U_ASSERT(stringPool_count
<= 0x7FFF);
824 Offset
* p
= enumToValue
->getOffsetArray();
825 /*EnumValue* e = enumToValue->getEnumArray();*/
826 U_ASSERT(valueMap_count
== enumToValue
->count
);
827 for (i
=0; i
<valueMap_count
; ++i
) {
828 p
[i
] = (Offset
)(valueMap_offset
+ sizeof(ValueMap
) * i
);
832 for (i
=0; i
<valueMap_count
; ++i
) {
833 ValueMap
& v
= valueMap
[i
];
834 v
.enumToName_offset
= v
.ncEnumToName_offset
= 0;
835 if (valueEnumToName
[i
] != 0) {
836 v
.enumToName_offset
= valueEnumToName_offset
[i
];
838 if (valueNCEnumToName
[i
] != 0) {
839 v
.ncEnumToName_offset
= valueEnumToName_offset
[i
];
841 v
.nameToEnum_offset
= valueNameToEnum_offset
[i
];
845 void Builder::fixup() {
847 fixupStringPoolOffsets();
848 fixupNameGroupPoolOffsets();
849 fixupMiscellaneousOffsets();
852 int8_t* Builder::createData(int32_t& length
) const {
854 int8_t* result
= MALLOC(int8_t, length
);
857 int8_t* limit
= result
+ length
;
859 #define APPEND2(x, size) \
860 U_ASSERT((p+size)<=limit); \
861 uprv_memcpy(p, x, size); \
864 #define APPEND(x) APPEND2(x, x##_size)
866 APPEND2(&header
, sizeof(header
));
872 for (int32_t i
=0; i
<valueMap_count
; ++i
) {
873 U_ASSERT((valueEnumToName
[i
] != 0 && valueNCEnumToName
[i
] == 0) ||
874 (valueEnumToName
[i
] == 0 && valueNCEnumToName
[i
] != 0));
875 if (valueEnumToName
[i
] != 0) {
876 APPEND2(valueEnumToName
[i
], valueEnumToName_size
[i
]);
878 if (valueNCEnumToName
[i
] != 0) {
879 APPEND2(valueNCEnumToName
[i
], valueEnumToName_size
[i
]);
881 APPEND2(valueNameToEnum
[i
], valueNameToEnum_size
[i
]);
884 APPEND(nameGroupPool
);
888 fprintf(stderr
, "p != limit; p = %p, limit = %p", p
, limit
);
895 //----------------------------------------------------------------------
897 /* UDataInfo cf. udata.h */
898 static UDataInfo dataInfo
= {
907 {PNAME_SIG_0
, PNAME_SIG_1
, PNAME_SIG_2
, PNAME_SIG_3
},
908 {PNAME_FORMAT_VERSION
, 0, 0, 0}, /* formatVersion */
909 {VERSION_0
, VERSION_1
, VERSION_2
, VERSION_3
} /* Unicode version */
914 // command-line options
920 int MMain(int argc
, char *argv
[]);
923 NameToEnumEntry
* createNameIndex(const AliasList
& list
,
924 int32_t& nameIndexCount
);
926 EnumToNameGroupEntry
* createEnumIndex(const AliasList
& list
);
928 int32_t writeDataFile(const char *destdir
, const Builder
&);
931 int main(int argc
, char *argv
[]) {
932 UErrorCode status
= U_ZERO_ERROR
;
934 if (U_FAILURE(status
) && status
!= U_FILE_ACCESS_ERROR
) {
935 // Note: u_init() will try to open ICU property data.
936 // failures here are expected when building ICU from scratch.
938 fprintf(stderr
, "genpname: can not initialize ICU. Status = %s\n",
939 u_errorName(status
));
944 U_MAIN_INIT_ARGS(argc
, argv
);
945 int retVal
= app
.MMain(argc
, argv
);
950 static UOption options
[]={
952 UOPTION_HELP_QUESTION_MARK
,
956 UOPTION_DEF("debug", 'D', UOPT_REQUIRES_ARG
),
959 NameToEnumEntry
* genpname::createNameIndex(const AliasList
& list
,
960 int32_t& nameIndexCount
) {
962 // Build name => enum map
964 // This is an n->1 map. There are typically multiple names
965 // mapping to one enum. The name index is sorted in order of the name,
966 // as defined by the uprv_compareAliasNames() function.
969 int32_t count
= list
.count();
971 // compute upper limit on number of names in the index
972 int32_t nameIndexCapacity
= count
* MAX_NAMES_PER_GROUP
;
973 NameToEnumEntry
* nameIndex
= MALLOC(NameToEnumEntry
, nameIndexCapacity
);
976 int32_t names
[MAX_NAMES_PER_GROUP
];
977 for (i
=0; i
<count
; ++i
) {
978 const Alias
& p
= list
[i
];
979 int32_t n
= p
.getUniqueNames(names
);
980 for (j
=0; j
<n
; ++j
) {
981 U_ASSERT(nameIndexCount
< nameIndexCapacity
);
982 nameIndex
[nameIndexCount
++] =
983 NameToEnumEntry(names
[j
], p
.enumValue
);
988 * use a stable sort to ensure consistent results between
989 * genpname.cpp and the propname.cpp swapping code
991 UErrorCode errorCode
= U_ZERO_ERROR
;
992 uprv_sortArray(nameIndex
, nameIndexCount
, sizeof(nameIndex
[0]),
993 compareNameToEnumEntry
, NULL
, TRUE
, &errorCode
);
995 printf("Alias names: %d\n", (int)nameIndexCount
);
996 for (i
=0; i
<nameIndexCount
; ++i
) {
998 STRING_TABLE
[nameIndex
[i
].nameIndex
].str
,
999 (int)nameIndex
[i
].enumValue
);
1003 // make sure there are no duplicates. for a sorted list we need
1004 // only compare adjacent items. Alias.getUniqueNames() has
1005 // already eliminated duplicate names for a single property, which
1006 // does occur, so we're checking for duplicate names between two
1007 // properties, which should never occur.
1009 for (i
=1; i
<nameIndexCount
; ++i
) {
1010 if (STRING_TABLE
[nameIndex
[i
-1].nameIndex
] ==
1011 STRING_TABLE
[nameIndex
[i
].nameIndex
]) {
1012 printf("Error: Duplicate names in property list: \"%s\", \"%s\"\n",
1013 STRING_TABLE
[nameIndex
[i
-1].nameIndex
].str
,
1014 STRING_TABLE
[nameIndex
[i
].nameIndex
].str
);
1019 die("Two or more duplicate names in property list");
1025 EnumToNameGroupEntry
* genpname::createEnumIndex(const AliasList
& list
) {
1027 // Build the enum => name map
1029 // This is a 1->n map. Each enum maps to 1 or more names. To
1030 // accomplish this the index entry points to an element of the
1031 // NAME_GROUP array. This is the short name (which may be empty).
1032 // From there, subsequent elements of NAME_GROUP are alternate
1033 // names for this enum, up to and including the first one that is
1034 // negative (negate for actual index).
1037 int32_t count
= list
.count();
1039 EnumToNameGroupEntry
* enumIndex
= MALLOC(EnumToNameGroupEntry
, count
);
1040 for (i
=0; i
<count
; ++i
) {
1041 const Alias
& p
= list
[i
];
1042 enumIndex
[i
] = EnumToNameGroupEntry(p
.enumValue
, p
.nameGroupIndex
);
1045 UErrorCode errorCode
= U_ZERO_ERROR
;
1046 uprv_sortArray(enumIndex
, count
, sizeof(enumIndex
[0]),
1047 compareEnumToNameGroupEntry
, NULL
, FALSE
, &errorCode
);
1049 printf("Property enums: %d\n", (int)count
);
1050 for (i
=0; i
<count
; ++i
) {
1051 printf("%d => %d: ",
1052 (int)enumIndex
[i
].enumValue
,
1053 (int)enumIndex
[i
].nameGroupIndex
);
1055 for (j
=enumIndex
[i
].nameGroupIndex
; !done
; ++j
) {
1061 printf("\"%s\"", STRING_TABLE
[k
].str
);
1062 if (!done
) printf(", ");
1071 int genpname::MMain(int argc
, char* argv
[])
1074 UErrorCode status
= U_ZERO_ERROR
;
1077 if (U_FAILURE(status
) && status
!= U_FILE_ACCESS_ERROR
) {
1078 fprintf(stderr
, "Error: u_init returned %s\n", u_errorName(status
));
1079 status
= U_ZERO_ERROR
;
1083 /* preset then read command line options */
1084 options
[3].value
=u_getDataDirectory();
1085 argc
=u_parseArgs(argc
, argv
, sizeof(options
)/sizeof(options
[0]), options
);
1087 /* error handling, printing usage message */
1090 "error in command line argument \"%s\"\n",
1094 debug
= options
[5].doesOccur
? (*options
[5].value
- '0') : 0;
1096 if (argc
!=1 || options
[0].doesOccur
|| options
[1].doesOccur
||
1097 debug
< 0 || debug
> 9) {
1099 "usage: %s [-options]\n"
1100 "\tcreate " PNAME_DATA_NAME
"." PNAME_DATA_TYPE
"\n"
1102 "\t-h or -? or --help this usage text\n"
1103 "\t-v or --verbose turn on verbose output\n"
1104 "\t-c or --copyright include a copyright notice\n"
1105 "\t-d or --destdir destination directory, followed by the path\n"
1106 "\t-D or --debug 0..9 emit debugging messages (if > 0)\n",
1108 return argc
<0 ? U_ILLEGAL_ARGUMENT_ERROR
: U_ZERO_ERROR
;
1111 /* get the options values */
1112 useCopyright
=options
[2].doesOccur
;
1113 verbose
= options
[4].doesOccur
;
1115 // ------------------------------------------------------------
1116 // Do not sort the string table, instead keep it in data.h order.
1117 // This simplifies data swapping and testing thereof because the string
1118 // table itself need not be sorted during swapping.
1119 // The NameToEnum sorter sorts each such map's string offsets instead.
1122 printf("String pool: %d\n", (int)STRING_COUNT
);
1123 for (i
=0; i
<STRING_COUNT
; ++i
) {
1127 printf("%s (%d)", STRING_TABLE
[i
].str
, (int)STRING_TABLE
[i
].index
);
1132 // ------------------------------------------------------------
1133 // Create top-level property indices
1135 PropertyArrayList
props(PROPERTY
, PROPERTY_COUNT
);
1136 int32_t propNameCount
;
1137 NameToEnumEntry
* propName
= createNameIndex(props
, propNameCount
);
1138 EnumToNameGroupEntry
* propEnum
= createEnumIndex(props
);
1140 // ------------------------------------------------------------
1141 // Create indices for the value list for each enumerated property
1143 // This will have more entries than we need...
1144 EnumToValueEntry
* enumToValue
= MALLOC(EnumToValueEntry
, PROPERTY_COUNT
);
1145 int32_t enumToValue_count
= 0;
1146 for (i
=0, j
=0; i
<PROPERTY_COUNT
; ++i
) {
1147 if (PROPERTY
[i
].valueCount
== 0) continue;
1148 AliasArrayList
values(PROPERTY
[i
].valueList
,
1149 PROPERTY
[i
].valueCount
);
1150 enumToValue
[j
].enumValue
= PROPERTY
[i
].enumValue
;
1151 enumToValue
[j
].enumToName
= createEnumIndex(values
);
1152 enumToValue
[j
].enumToName_count
= PROPERTY
[i
].valueCount
;
1153 enumToValue
[j
].nameToEnum
= createNameIndex(values
,
1154 enumToValue
[j
].nameToEnum_count
);
1157 enumToValue_count
= j
;
1159 uprv_sortArray(enumToValue
, enumToValue_count
, sizeof(enumToValue
[0]),
1160 compareEnumToValueEntry
, NULL
, FALSE
, &status
);
1162 // ------------------------------------------------------------
1163 // Build PropertyAliases layout in memory
1165 Builder
builder(debug
);
1167 builder
.buildTopLevelProperties(propName
,
1172 builder
.buildValues(enumToValue
,
1175 builder
.buildStringPool(STRING_TABLE
,
1182 ////////////////////////////////////////////////////////////
1183 // Write the output file
1184 ////////////////////////////////////////////////////////////
1185 int32_t wlen
= writeDataFile(options
[3].value
, builder
);
1187 fprintf(stdout
, "Output file: %s.%s, %ld bytes\n",
1188 U_ICUDATA_NAME
"_" PNAME_DATA_NAME
, PNAME_DATA_TYPE
, (long)wlen
);
1191 return 0; // success
1194 int32_t genpname::writeDataFile(const char *destdir
, const Builder
& builder
) {
1196 int8_t* data
= builder
.createData(length
);
1198 UNewDataMemory
*pdata
;
1199 UErrorCode status
= U_ZERO_ERROR
;
1201 pdata
= udata_create(destdir
, PNAME_DATA_TYPE
, PNAME_DATA_NAME
, &dataInfo
,
1202 useCopyright
? U_COPYRIGHT_STRING
: 0, &status
);
1203 if (U_FAILURE(status
)) {
1204 die("Unable to create data memory");
1207 udata_writeBlock(pdata
, data
, length
);
1209 int32_t dataLength
= (int32_t) udata_finish(pdata
, &status
);
1210 if (U_FAILURE(status
)) {
1211 die("Error writing output file");
1213 if (dataLength
!= length
) {
1214 die("Written file doesn't match expected size");