2 **********************************************************************
3 * Copyright (C) 2002-2006, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 10/11/02 aliu Creation.
8 **********************************************************************
11 #include "unicode/utypes.h"
12 #include "unicode/putil.h"
13 #include "unicode/uclean.h"
28 // TODO: Clean up and comment this code.
30 //----------------------------------------------------------------------
33 // This is the raw data to be output. We define the data structure,
34 // then include a machine-generated header that contains the actual
37 #include "unicode/uchar.h"
38 #include "unicode/uscript.h"
39 #include "unicode/unorm.h"
46 AliasName(const char* str
, int32_t index
);
48 int compare(const AliasName
& other
) const;
50 UBool
operator==(const AliasName
& other
) const {
51 return compare(other
) == 0;
54 UBool
operator!=(const AliasName
& other
) const {
55 return compare(other
) != 0;
59 AliasName::AliasName(const char* _str
,
66 int AliasName::compare(const AliasName
& other
) const {
67 return uprv_comparePropertyNames(str
, other
.str
);
73 int32_t nameGroupIndex
;
75 Alias(int32_t enumValue
,
76 int32_t nameGroupIndex
);
78 int32_t getUniqueNames(int32_t* nameGroupIndices
) const;
81 Alias::Alias(int32_t anEnumValue
,
82 int32_t aNameGroupIndex
) :
83 enumValue(anEnumValue
),
84 nameGroupIndex(aNameGroupIndex
)
88 class Property
: public Alias
{
91 const Alias
* valueList
;
93 Property(int32_t enumValue
,
94 int32_t nameGroupIndex
,
96 const Alias
* valueList
);
99 Property::Property(int32_t _enumValue
,
100 int32_t _nameGroupIndex
,
102 const Alias
* _valueList
) :
103 Alias(_enumValue
, _nameGroupIndex
),
104 valueCount(_valueCount
),
105 valueList(_valueList
)
109 // *** Include the data header ***
112 /* return a list of unique names, not including "", for this property
113 * @param stringIndices array of at least MAX_NAMES_PER_GROUP
114 * elements, will be filled with indices into STRING_TABLE
115 * @return number of indices, >= 1
117 int32_t Alias::getUniqueNames(int32_t* stringIndices
) const {
119 int32_t i
= nameGroupIndex
;
122 int32_t j
= NAME_GROUP
[i
++];
127 if (j
== 0) continue; // omit "" entries
129 for (int32_t k
=0; k
<count
; ++k
) {
130 if (stringIndices
[k
] == j
) {
134 // also do a string check for things like "age|Age"
135 if (STRING_TABLE
[stringIndices
[k
]] == STRING_TABLE
[j
]) {
136 //printf("Found dupe %s|%s\n",
137 // STRING_TABLE[stringIndices[k]].str,
138 // STRING_TABLE[j].str);
143 if (dupe
) continue; // omit duplicates
144 stringIndices
[count
++] = j
;
150 //----------------------------------------------------------------------
152 #define MALLOC(type, count) \
153 (type*) uprv_malloc(sizeof(type) * count)
155 void die(const char* msg
) {
156 fprintf(stderr
, "Error: %s\n", msg
);
160 //----------------------------------------------------------------------
163 * A list of Alias objects.
167 virtual ~AliasList();
168 virtual const Alias
& operator[](int32_t i
) const = 0;
169 virtual int32_t count() const = 0;
172 AliasList::~AliasList() {}
177 class AliasArrayList
: public AliasList
{
181 AliasArrayList(const Alias
* _a
, int32_t _n
) {
185 virtual const Alias
& operator[](int32_t i
) const {
188 virtual int32_t count() const {
196 class PropertyArrayList
: public AliasList
{
200 PropertyArrayList(const Property
* _a
, int32_t _n
) {
204 virtual const Alias
& operator[](int32_t i
) const {
207 virtual int32_t count() const {
212 //----------------------------------------------------------------------
215 * An element in a name index. It maps a name (given by index) into
218 class NameToEnumEntry
{
222 NameToEnumEntry(int32_t a
, int32_t b
) { nameIndex
=a
; enumValue
=b
; }
225 // Sort function for NameToEnumEntry (sort by name)
227 compareNameToEnumEntry(const void * /*context*/, const void* e1
, const void* e2
) {
229 STRING_TABLE
[((NameToEnumEntry
*)e1
)->nameIndex
].
230 compare(STRING_TABLE
[((NameToEnumEntry
*)e2
)->nameIndex
]);
233 //----------------------------------------------------------------------
236 * An element in an enum index. It maps an enum into a name group entry
239 class EnumToNameGroupEntry
{
242 int32_t nameGroupIndex
;
243 EnumToNameGroupEntry(int32_t a
, int32_t b
) { enumValue
=a
; nameGroupIndex
=b
; }
245 // are enumValues contiguous for count entries starting with this one?
246 // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
247 UBool
isContiguous(int32_t count
) const {
248 const EnumToNameGroupEntry
* p
= this;
249 for (int32_t i
=1; i
<count
; ++i
) {
250 if (p
[i
].enumValue
!= (this->enumValue
+ i
)) {
258 // Sort function for EnumToNameGroupEntry (sort by name index)
260 compareEnumToNameGroupEntry(const void * /*context*/, const void* e1
, const void* e2
) {
261 return ((EnumToNameGroupEntry
*)e1
)->enumValue
- ((EnumToNameGroupEntry
*)e2
)->enumValue
;
264 //----------------------------------------------------------------------
267 * An element in the map from enumerated property enums to value maps.
269 class EnumToValueEntry
{
272 EnumToNameGroupEntry
* enumToName
;
273 int32_t enumToName_count
;
274 NameToEnumEntry
* nameToEnum
;
275 int32_t nameToEnum_count
;
277 // are enumValues contiguous for count entries starting with this one?
278 // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
279 UBool
isContiguous(int32_t count
) const {
280 const EnumToValueEntry
* p
= this;
281 for (int32_t i
=1; i
<count
; ++i
) {
282 if (p
[i
].enumValue
!= (this->enumValue
+ i
)) {
290 // Sort function for EnumToValueEntry (sort by enum)
292 compareEnumToValueEntry(const void * /*context*/, const void* e1
, const void* e2
) {
293 return ((EnumToValueEntry
*)e1
)->enumValue
- ((EnumToValueEntry
*)e2
)->enumValue
;
296 //----------------------------------------------------------------------
299 #define IS_VALID_OFFSET(x) (((x)>=0)&&((x)<=MAX_OFFSET))
303 PropertyAliases header
;
306 NonContiguousEnumToOffset
* enumToName
;
307 int32_t enumToName_size
;
308 Offset enumToName_offset
;
313 NameToEnum
* nameToEnum
;
314 int32_t nameToEnum_size
;
315 Offset nameToEnum_offset
;
318 NonContiguousEnumToOffset
* enumToValue
;
319 int32_t enumToValue_size
;
320 Offset enumToValue_offset
;
324 int32_t valueMap_size
;
325 int32_t valueMap_count
;
326 Offset valueMap_offset
;
328 // for any i, one of valueEnumToName[i] or valueNCEnumToName[i] is
329 // NULL and one is not. valueEnumToName_size[i] is the size of
330 // the non-NULL one. i=0..valueMapCount-1
332 EnumToOffset
** valueEnumToName
;
334 NonContiguousEnumToOffset
** valueNCEnumToName
;
335 int32_t* valueEnumToName_size
;
336 Offset
* valueEnumToName_offset
;
338 // arrays of valueMap_count pointers, sizes, & offsets
339 NameToEnum
** valueNameToEnum
;
340 int32_t* valueNameToEnum_size
;
341 Offset
* valueNameToEnum_offset
;
344 Offset
* nameGroupPool
;
345 int32_t nameGroupPool_count
;
346 int32_t nameGroupPool_size
;
347 Offset nameGroupPool_offset
;
351 int32_t stringPool_count
;
352 int32_t stringPool_size
;
353 Offset stringPool_offset
;
354 Offset
* stringPool_offsetArray
; // relative to stringPool
356 int32_t total_size
; // size of everything
362 Builder(int32_t debugLevel
);
365 void buildTopLevelProperties(const NameToEnumEntry
* propName
,
366 int32_t propNameCount
,
367 const EnumToNameGroupEntry
* propEnum
,
368 int32_t propEnumCount
);
370 void buildValues(const EnumToValueEntry
* e2v
,
373 void buildStringPool(const AliasName
* propertyNames
,
374 int32_t propertyNameCount
,
375 const int32_t* nameGroupIndices
,
376 int32_t nameGroupIndicesCount
);
380 int8_t* createData(int32_t& length
) const;
384 static EnumToOffset
* buildEnumToOffset(const EnumToNameGroupEntry
* e2ng
,
387 static NonContiguousEnumToOffset
*
388 buildNCEnumToNameGroup(const EnumToNameGroupEntry
* e2ng
,
392 static NonContiguousEnumToOffset
*
393 buildNCEnumToValue(const EnumToValueEntry
* e2v
,
397 static NameToEnum
* buildNameToEnum(const NameToEnumEntry
* nameToEnum
,
401 Offset
stringIndexToOffset(int32_t index
, UBool allowNeg
=FALSE
) const;
402 void fixupNameToEnum(NameToEnum
* n
);
403 void fixupEnumToNameGroup(EnumToOffset
* e2ng
);
404 void fixupNCEnumToNameGroup(NonContiguousEnumToOffset
* e2ng
);
406 void computeOffsets();
407 void fixupStringPoolOffsets();
408 void fixupNameGroupPoolOffsets();
409 void fixupMiscellaneousOffsets();
411 static int32_t align(int32_t a
);
412 static void erase(void* p
, int32_t size
);
415 Builder::Builder(int32_t debugLevel
) {
423 valueNCEnumToName
= 0;
424 valueEnumToName_size
= 0;
425 valueEnumToName_offset
= 0;
427 valueNameToEnum_size
= 0;
428 valueNameToEnum_offset
= 0;
431 stringPool_offsetArray
= 0;
434 Builder::~Builder() {
435 uprv_free(enumToName
);
436 uprv_free(nameToEnum
);
437 uprv_free(enumToValue
);
439 for (int32_t i
=0; i
<valueMap_count
; ++i
) {
440 uprv_free(valueEnumToName
[i
]);
441 uprv_free(valueNCEnumToName
[i
]);
442 uprv_free(valueNameToEnum
[i
]);
444 uprv_free(valueEnumToName
);
445 uprv_free(valueNCEnumToName
);
446 uprv_free(valueEnumToName_size
);
447 uprv_free(valueEnumToName_offset
);
448 uprv_free(valueNameToEnum
);
449 uprv_free(valueNameToEnum_size
);
450 uprv_free(valueNameToEnum_offset
);
451 uprv_free(nameGroupPool
);
452 uprv_free(stringPool
);
453 uprv_free(stringPool_offsetArray
);
456 int32_t Builder::align(int32_t a
) {
458 int32_t k
= a
% sizeof(int32_t);
462 a
+= sizeof(int32_t) - k
;
466 void Builder::erase(void* p
, int32_t size
) {
468 int8_t* q
= (int8_t*) p
;
474 EnumToOffset
* Builder::buildEnumToOffset(const EnumToNameGroupEntry
* e2ng
,
477 U_ASSERT(e2ng
->isContiguous(count
));
478 size
= align(EnumToOffset::getSize(count
));
479 EnumToOffset
* result
= (EnumToOffset
*) uprv_malloc(size
);
481 result
->enumStart
= e2ng
->enumValue
;
482 result
->enumLimit
= e2ng
->enumValue
+ count
;
483 Offset
* p
= result
->getOffsetArray();
484 for (int32_t i
=0; i
<count
; ++i
) {
485 // set these to NGI index values
486 // fix them up to NGI offset values
487 U_ASSERT(IS_VALID_OFFSET(e2ng
[i
].nameGroupIndex
));
488 p
[i
] = (Offset
) e2ng
[i
].nameGroupIndex
; // FIXUP later
493 NonContiguousEnumToOffset
*
494 Builder::buildNCEnumToNameGroup(const EnumToNameGroupEntry
* e2ng
,
497 U_ASSERT(!e2ng
->isContiguous(count
));
498 size
= align(NonContiguousEnumToOffset::getSize(count
));
499 NonContiguousEnumToOffset
* nc
= (NonContiguousEnumToOffset
*) uprv_malloc(size
);
502 EnumValue
* e
= nc
->getEnumArray();
503 Offset
* p
= nc
->getOffsetArray();
504 for (int32_t i
=0; i
<count
; ++i
) {
505 // set these to NGI index values
506 // fix them up to NGI offset values
507 e
[i
] = e2ng
[i
].enumValue
;
508 U_ASSERT(IS_VALID_OFFSET(e2ng
[i
].nameGroupIndex
));
509 p
[i
] = (Offset
) e2ng
[i
].nameGroupIndex
; // FIXUP later
514 NonContiguousEnumToOffset
*
515 Builder::buildNCEnumToValue(const EnumToValueEntry
* e2v
,
518 U_ASSERT(!e2v
->isContiguous(count
));
519 size
= align(NonContiguousEnumToOffset::getSize(count
));
520 NonContiguousEnumToOffset
* result
= (NonContiguousEnumToOffset
*) uprv_malloc(size
);
522 result
->count
= count
;
523 EnumValue
* e
= result
->getEnumArray();
524 for (int32_t i
=0; i
<count
; ++i
) {
525 e
[i
] = e2v
[i
].enumValue
;
526 // offset must be set later
532 * Given an index into the string pool, return an offset. computeOffsets()
533 * must have been called already. If allowNegative is true, allow negatives
534 * and preserve their sign.
536 Offset
Builder::stringIndexToOffset(int32_t index
, UBool allowNegative
) const {
537 // Index 0 is ""; we turn this into an Offset of zero
538 if (index
== 0) return 0;
541 return -Builder::stringIndexToOffset(-index
);
543 die("Negative string pool index");
546 if (index
>= stringPool_count
) {
547 die("String pool index too large");
549 Offset result
= stringPool_offset
+ stringPool_offsetArray
[index
];
550 U_ASSERT(result
>= 0 && result
< total_size
);
553 return 0; // never executed; make compiler happy
556 NameToEnum
* Builder::buildNameToEnum(const NameToEnumEntry
* nameToEnum
,
559 size
= align(NameToEnum::getSize(count
));
560 NameToEnum
* n2e
= (NameToEnum
*) uprv_malloc(size
);
563 Offset
* p
= n2e
->getNameArray();
564 EnumValue
* e
= n2e
->getEnumArray();
565 for (int32_t i
=0; i
<count
; ++i
) {
566 // set these to SP index values
567 // fix them up to SP offset values
568 U_ASSERT(IS_VALID_OFFSET(nameToEnum
[i
].nameIndex
));
569 p
[i
] = (Offset
) nameToEnum
[i
].nameIndex
; // FIXUP later
570 e
[i
] = nameToEnum
[i
].enumValue
;
576 void Builder::buildTopLevelProperties(const NameToEnumEntry
* propName
,
577 int32_t propNameCount
,
578 const EnumToNameGroupEntry
* propEnum
,
579 int32_t propEnumCount
) {
580 enumToName
= buildNCEnumToNameGroup(propEnum
,
583 nameToEnum
= buildNameToEnum(propName
,
588 void Builder::buildValues(const EnumToValueEntry
* e2v
,
592 U_ASSERT(!e2v
->isContiguous(count
));
594 valueMap_count
= count
;
596 enumToValue
= buildNCEnumToValue(e2v
, count
,
599 valueMap_size
= align(count
* sizeof(ValueMap
));
600 valueMap
= (ValueMap
*) uprv_malloc(valueMap_size
);
601 erase(valueMap
, valueMap_size
);
603 valueEnumToName
= MALLOC(EnumToOffset
*, count
);
604 valueNCEnumToName
= MALLOC(NonContiguousEnumToOffset
*, count
);
605 valueEnumToName_size
= MALLOC(int32_t, count
);
606 valueEnumToName_offset
= MALLOC(Offset
, count
);
607 valueNameToEnum
= MALLOC(NameToEnum
*, count
);
608 valueNameToEnum_size
= MALLOC(int32_t, count
);
609 valueNameToEnum_offset
= MALLOC(Offset
, count
);
611 for (i
=0; i
<count
; ++i
) {
613 e2v
[i
].enumToName
->isContiguous(e2v
[i
].enumToName_count
);
614 valueEnumToName
[i
] = 0;
615 valueNCEnumToName
[i
] = 0;
617 valueEnumToName
[i
] = buildEnumToOffset(e2v
[i
].enumToName
,
618 e2v
[i
].enumToName_count
,
619 valueEnumToName_size
[i
]);
621 valueNCEnumToName
[i
] = buildNCEnumToNameGroup(e2v
[i
].enumToName
,
622 e2v
[i
].enumToName_count
,
623 valueEnumToName_size
[i
]);
626 buildNameToEnum(e2v
[i
].nameToEnum
,
627 e2v
[i
].nameToEnum_count
,
628 valueNameToEnum_size
[i
]);
632 void Builder::buildStringPool(const AliasName
* propertyNames
,
633 int32_t propertyNameCount
,
634 const int32_t* nameGroupIndices
,
635 int32_t nameGroupIndicesCount
) {
638 nameGroupPool_count
= nameGroupIndicesCount
;
639 nameGroupPool_size
= sizeof(Offset
) * nameGroupPool_count
;
640 nameGroupPool
= MALLOC(Offset
, nameGroupPool_count
);
642 for (i
=0; i
<nameGroupPool_count
; ++i
) {
643 // Some indices are negative.
644 int32_t a
= nameGroupIndices
[i
];
646 U_ASSERT(IS_VALID_OFFSET(a
));
647 nameGroupPool
[i
] = (Offset
) nameGroupIndices
[i
];
650 stringPool_count
= propertyNameCount
;
652 // first string must be "" -- we skip it
653 U_ASSERT(*propertyNames
[0].str
== 0);
654 for (i
=1 /*sic*/; i
<propertyNameCount
; ++i
) {
655 stringPool_size
+= (int32_t)(uprv_strlen(propertyNames
[i
].str
) + 1);
657 stringPool
= MALLOC(char, stringPool_size
);
658 stringPool_offsetArray
= MALLOC(Offset
, stringPool_count
);
660 char* p
= stringPool
;
661 stringPool_offsetArray
[0] = -1; // we don't use this entry
662 for (i
=1 /*sic*/; i
<propertyNameCount
; ++i
) {
663 const char* str
= propertyNames
[i
].str
;
664 int32_t len
= (int32_t)uprv_strlen(str
);
668 stringPool_offsetArray
[i
] = soFar
;
669 soFar
+= (Offset
)(len
+1);
671 U_ASSERT(soFar
== stringPool_size
);
672 U_ASSERT(p
== (stringPool
+ stringPool_size
));
675 // Confirm that PropertyAliases is a POD (plain old data; see C++
676 // std). The following union will _fail to compile_ if
677 // PropertyAliases is _not_ a POD. (Note: We used to use the offsetof
678 // macro to check this, but that's not quite right, so that test is
679 // commented out -- see below.)
683 } PropertyAliasesPODTest
;
685 void Builder::computeOffsets() {
687 Offset off
= sizeof(header
);
690 printf("header \t offset=%4d size=%5d\n", 0, off
);
693 // PropertyAliases must have no v-table and must be
694 // padded (if necessary) to the next 32-bit boundary.
695 //U_ASSERT(offsetof(PropertyAliases, enumToName_offset) == 0); // see above
696 U_ASSERT(sizeof(header
) % sizeof(int32_t) == 0);
698 #define COMPUTE_OFFSET(foo) COMPUTE_OFFSET2(foo,int32_t)
700 #define COMPUTE_OFFSET2(foo,type) \
702 printf(#foo "\t offset=%4d size=%5d\n", off, (int)foo##_size);\
704 U_ASSERT(IS_VALID_OFFSET(off + foo##_size));\
705 U_ASSERT(foo##_offset % sizeof(type) == 0);\
706 off = (Offset) (off + foo##_size);
708 COMPUTE_OFFSET(enumToName
); // 0:
709 COMPUTE_OFFSET(nameToEnum
); // 2:
710 COMPUTE_OFFSET(enumToValue
); // 3:
711 COMPUTE_OFFSET(valueMap
); // 4:
713 for (i
=0; i
<valueMap_count
; ++i
) {
715 printf(" enumToName[%d]\t offset=%4d size=%5d\n",
716 (int)i
, off
, (int)valueEnumToName_size
[i
]);
719 valueEnumToName_offset
[i
] = off
; // 5:
720 U_ASSERT(IS_VALID_OFFSET(off
+ valueEnumToName_size
[i
]));
721 off
= (Offset
) (off
+ valueEnumToName_size
[i
]);
724 printf(" nameToEnum[%d]\t offset=%4d size=%5d\n",
725 (int)i
, off
, (int)valueNameToEnum_size
[i
]);
728 valueNameToEnum_offset
[i
] = off
; // 6:
729 U_ASSERT(IS_VALID_OFFSET(off
+ valueNameToEnum_size
[i
]));
730 off
= (Offset
) (off
+ valueNameToEnum_size
[i
]);
733 // These last two chunks have weaker alignment needs
734 COMPUTE_OFFSET2(nameGroupPool
,Offset
); // 98:
735 COMPUTE_OFFSET2(stringPool
,char); // 99:
738 if (debug
>0) printf("total size=%5d\n\n", (int)total_size
);
739 U_ASSERT(total_size
<= (MAX_OFFSET
+1));
742 void Builder::fixupNameToEnum(NameToEnum
* n
) {
743 // Fix the string pool offsets in n
744 Offset
* p
= n
->getNameArray();
745 for (int32_t i
=0; i
<n
->count
; ++i
) {
746 p
[i
] = stringIndexToOffset(p
[i
]);
750 void Builder::fixupStringPoolOffsets() {
754 fixupNameToEnum(nameToEnum
);
757 for (i
=0; i
<valueMap_count
; ++i
) {
758 fixupNameToEnum(valueNameToEnum
[i
]);
762 for (i
=0; i
<nameGroupPool_count
; ++i
) {
763 nameGroupPool
[i
] = stringIndexToOffset(nameGroupPool
[i
], TRUE
);
767 void Builder::fixupEnumToNameGroup(EnumToOffset
* e2ng
) {
770 Offset
* p
= e2ng
->getOffsetArray();
771 for (i
=e2ng
->enumStart
, j
=0; i
<e2ng
->enumLimit
; ++i
, ++j
) {
772 p
[j
] = nameGroupPool_offset
+ sizeof(Offset
) * p
[j
];
776 void Builder::fixupNCEnumToNameGroup(NonContiguousEnumToOffset
* e2ng
) {
778 /*EnumValue* e = e2ng->getEnumArray();*/
779 Offset
* p
= e2ng
->getOffsetArray();
780 for (i
=0; i
<e2ng
->count
; ++i
) {
781 p
[i
] = nameGroupPool_offset
+ sizeof(Offset
) * p
[i
];
785 void Builder::fixupNameGroupPoolOffsets() {
789 fixupNCEnumToNameGroup(enumToName
);
794 for (i
=0; i
<valueMap_count
; ++i
) {
796 if (valueEnumToName
[i
] != 0) {
797 fixupEnumToNameGroup(valueEnumToName
[i
]);
800 if (valueNCEnumToName
[i
] != 0) {
801 fixupNCEnumToNameGroup(valueNCEnumToName
[i
]);
806 void Builder::fixupMiscellaneousOffsets() {
810 erase(&header
, sizeof(header
));
811 header
.enumToName_offset
= enumToName_offset
;
812 header
.nameToEnum_offset
= nameToEnum_offset
;
813 header
.enumToValue_offset
= enumToValue_offset
;
814 // header meta-info used by Java:
815 U_ASSERT(total_size
> 0 && total_size
< 0x7FFF);
816 header
.total_size
= (int16_t) total_size
;
817 header
.valueMap_offset
= valueMap_offset
;
818 header
.valueMap_count
= (int16_t) valueMap_count
;
819 header
.nameGroupPool_offset
= nameGroupPool_offset
;
820 header
.nameGroupPool_count
= (int16_t) nameGroupPool_count
;
821 header
.stringPool_offset
= stringPool_offset
;
822 header
.stringPool_count
= (int16_t) stringPool_count
- 1; // don't include "" entry
824 U_ASSERT(valueMap_count
<= 0x7FFF);
825 U_ASSERT(nameGroupPool_count
<= 0x7FFF);
826 U_ASSERT(stringPool_count
<= 0x7FFF);
829 Offset
* p
= enumToValue
->getOffsetArray();
830 /*EnumValue* e = enumToValue->getEnumArray();*/
831 U_ASSERT(valueMap_count
== enumToValue
->count
);
832 for (i
=0; i
<valueMap_count
; ++i
) {
833 p
[i
] = (Offset
)(valueMap_offset
+ sizeof(ValueMap
) * i
);
837 for (i
=0; i
<valueMap_count
; ++i
) {
838 ValueMap
& v
= valueMap
[i
];
839 v
.enumToName_offset
= v
.ncEnumToName_offset
= 0;
840 if (valueEnumToName
[i
] != 0) {
841 v
.enumToName_offset
= valueEnumToName_offset
[i
];
843 if (valueNCEnumToName
[i
] != 0) {
844 v
.ncEnumToName_offset
= valueEnumToName_offset
[i
];
846 v
.nameToEnum_offset
= valueNameToEnum_offset
[i
];
850 void Builder::fixup() {
852 fixupStringPoolOffsets();
853 fixupNameGroupPoolOffsets();
854 fixupMiscellaneousOffsets();
857 int8_t* Builder::createData(int32_t& length
) const {
859 int8_t* result
= MALLOC(int8_t, length
);
862 int8_t* limit
= result
+ length
;
864 #define APPEND2(x, size) \
865 U_ASSERT((p+size)<=limit); \
866 uprv_memcpy(p, x, size); \
869 #define APPEND(x) APPEND2(x, x##_size)
871 APPEND2(&header
, sizeof(header
));
877 for (int32_t i
=0; i
<valueMap_count
; ++i
) {
878 U_ASSERT((valueEnumToName
[i
] != 0 && valueNCEnumToName
[i
] == 0) ||
879 (valueEnumToName
[i
] == 0 && valueNCEnumToName
[i
] != 0));
880 if (valueEnumToName
[i
] != 0) {
881 APPEND2(valueEnumToName
[i
], valueEnumToName_size
[i
]);
883 if (valueNCEnumToName
[i
] != 0) {
884 APPEND2(valueNCEnumToName
[i
], valueEnumToName_size
[i
]);
886 APPEND2(valueNameToEnum
[i
], valueNameToEnum_size
[i
]);
889 APPEND(nameGroupPool
);
893 fprintf(stderr
, "p != limit; p = %p, limit = %p", p
, limit
);
900 //----------------------------------------------------------------------
902 /* UDataInfo cf. udata.h */
903 static UDataInfo dataInfo
= {
912 {PNAME_SIG_0
, PNAME_SIG_1
, PNAME_SIG_2
, PNAME_SIG_3
},
913 {PNAME_FORMAT_VERSION
, 0, 0, 0}, /* formatVersion */
914 {VERSION_0
, VERSION_1
, VERSION_2
, VERSION_3
} /* Unicode version */
919 // command-line options
925 int MMain(int argc
, char *argv
[]);
928 NameToEnumEntry
* createNameIndex(const AliasList
& list
,
929 int32_t& nameIndexCount
);
931 EnumToNameGroupEntry
* createEnumIndex(const AliasList
& list
);
933 int32_t writeDataFile(const char *destdir
, const Builder
&);
936 int main(int argc
, char *argv
[]) {
937 UErrorCode status
= U_ZERO_ERROR
;
939 if (U_FAILURE(status
) && status
!= U_FILE_ACCESS_ERROR
) {
940 // Note: u_init() will try to open ICU property data.
941 // failures here are expected when building ICU from scratch.
943 fprintf(stderr
, "genpname: can not initialize ICU. Status = %s\n",
944 u_errorName(status
));
949 U_MAIN_INIT_ARGS(argc
, argv
);
950 int retVal
= app
.MMain(argc
, argv
);
955 static UOption options
[]={
957 UOPTION_HELP_QUESTION_MARK
,
961 UOPTION_DEF("debug", 'D', UOPT_REQUIRES_ARG
),
964 NameToEnumEntry
* genpname::createNameIndex(const AliasList
& list
,
965 int32_t& nameIndexCount
) {
967 // Build name => enum map
969 // This is an n->1 map. There are typically multiple names
970 // mapping to one enum. The name index is sorted in order of the name,
971 // as defined by the uprv_compareAliasNames() function.
974 int32_t count
= list
.count();
976 // compute upper limit on number of names in the index
977 int32_t nameIndexCapacity
= count
* MAX_NAMES_PER_GROUP
;
978 NameToEnumEntry
* nameIndex
= MALLOC(NameToEnumEntry
, nameIndexCapacity
);
981 int32_t names
[MAX_NAMES_PER_GROUP
];
982 for (i
=0; i
<count
; ++i
) {
983 const Alias
& p
= list
[i
];
984 int32_t n
= p
.getUniqueNames(names
);
985 for (j
=0; j
<n
; ++j
) {
986 U_ASSERT(nameIndexCount
< nameIndexCapacity
);
987 nameIndex
[nameIndexCount
++] =
988 NameToEnumEntry(names
[j
], p
.enumValue
);
993 * use a stable sort to ensure consistent results between
994 * genpname.cpp and the propname.cpp swapping code
996 UErrorCode errorCode
= U_ZERO_ERROR
;
997 uprv_sortArray(nameIndex
, nameIndexCount
, sizeof(nameIndex
[0]),
998 compareNameToEnumEntry
, NULL
, TRUE
, &errorCode
);
1000 printf("Alias names: %d\n", (int)nameIndexCount
);
1001 for (i
=0; i
<nameIndexCount
; ++i
) {
1002 printf("%s => %d\n",
1003 STRING_TABLE
[nameIndex
[i
].nameIndex
].str
,
1004 (int)nameIndex
[i
].enumValue
);
1008 // make sure there are no duplicates. for a sorted list we need
1009 // only compare adjacent items. Alias.getUniqueNames() has
1010 // already eliminated duplicate names for a single property, which
1011 // does occur, so we're checking for duplicate names between two
1012 // properties, which should never occur.
1014 for (i
=1; i
<nameIndexCount
; ++i
) {
1015 if (STRING_TABLE
[nameIndex
[i
-1].nameIndex
] ==
1016 STRING_TABLE
[nameIndex
[i
].nameIndex
]) {
1017 printf("Error: Duplicate names in property list: \"%s\", \"%s\"\n",
1018 STRING_TABLE
[nameIndex
[i
-1].nameIndex
].str
,
1019 STRING_TABLE
[nameIndex
[i
].nameIndex
].str
);
1024 die("Two or more duplicate names in property list");
1030 EnumToNameGroupEntry
* genpname::createEnumIndex(const AliasList
& list
) {
1032 // Build the enum => name map
1034 // This is a 1->n map. Each enum maps to 1 or more names. To
1035 // accomplish this the index entry points to an element of the
1036 // NAME_GROUP array. This is the short name (which may be empty).
1037 // From there, subsequent elements of NAME_GROUP are alternate
1038 // names for this enum, up to and including the first one that is
1039 // negative (negate for actual index).
1042 int32_t count
= list
.count();
1044 EnumToNameGroupEntry
* enumIndex
= MALLOC(EnumToNameGroupEntry
, count
);
1045 for (i
=0; i
<count
; ++i
) {
1046 const Alias
& p
= list
[i
];
1047 enumIndex
[i
] = EnumToNameGroupEntry(p
.enumValue
, p
.nameGroupIndex
);
1050 UErrorCode errorCode
= U_ZERO_ERROR
;
1051 uprv_sortArray(enumIndex
, count
, sizeof(enumIndex
[0]),
1052 compareEnumToNameGroupEntry
, NULL
, FALSE
, &errorCode
);
1054 printf("Property enums: %d\n", (int)count
);
1055 for (i
=0; i
<count
; ++i
) {
1056 printf("%d => %d: ",
1057 (int)enumIndex
[i
].enumValue
,
1058 (int)enumIndex
[i
].nameGroupIndex
);
1060 for (j
=enumIndex
[i
].nameGroupIndex
; !done
; ++j
) {
1066 printf("\"%s\"", STRING_TABLE
[k
].str
);
1067 if (!done
) printf(", ");
1076 int genpname::MMain(int argc
, char* argv
[])
1079 UErrorCode status
= U_ZERO_ERROR
;
1082 if (U_FAILURE(status
) && status
!= U_FILE_ACCESS_ERROR
) {
1083 fprintf(stderr
, "Error: u_init returned %s\n", u_errorName(status
));
1084 status
= U_ZERO_ERROR
;
1088 /* preset then read command line options */
1089 options
[3].value
=u_getDataDirectory();
1090 argc
=u_parseArgs(argc
, argv
, sizeof(options
)/sizeof(options
[0]), options
);
1092 /* error handling, printing usage message */
1095 "error in command line argument \"%s\"\n",
1099 debug
= options
[5].doesOccur
? (*options
[5].value
- '0') : 0;
1101 if (argc
!=1 || options
[0].doesOccur
|| options
[1].doesOccur
||
1102 debug
< 0 || debug
> 9) {
1104 "usage: %s [-options]\n"
1105 "\tcreate " PNAME_DATA_NAME
"." PNAME_DATA_TYPE
"\n"
1107 "\t-h or -? or --help this usage text\n"
1108 "\t-v or --verbose turn on verbose output\n"
1109 "\t-c or --copyright include a copyright notice\n"
1110 "\t-d or --destdir destination directory, followed by the path\n"
1111 "\t-D or --debug 0..9 emit debugging messages (if > 0)\n",
1113 return argc
<0 ? U_ILLEGAL_ARGUMENT_ERROR
: U_ZERO_ERROR
;
1116 /* get the options values */
1117 useCopyright
=options
[2].doesOccur
;
1118 verbose
= options
[4].doesOccur
;
1120 // ------------------------------------------------------------
1121 // Do not sort the string table, instead keep it in data.h order.
1122 // This simplifies data swapping and testing thereof because the string
1123 // table itself need not be sorted during swapping.
1124 // The NameToEnum sorter sorts each such map's string offsets instead.
1127 printf("String pool: %d\n", (int)STRING_COUNT
);
1128 for (i
=0; i
<STRING_COUNT
; ++i
) {
1132 printf("%s (%d)", STRING_TABLE
[i
].str
, (int)STRING_TABLE
[i
].index
);
1137 // ------------------------------------------------------------
1138 // Create top-level property indices
1140 PropertyArrayList
props(PROPERTY
, PROPERTY_COUNT
);
1141 int32_t propNameCount
;
1142 NameToEnumEntry
* propName
= createNameIndex(props
, propNameCount
);
1143 EnumToNameGroupEntry
* propEnum
= createEnumIndex(props
);
1145 // ------------------------------------------------------------
1146 // Create indices for the value list for each enumerated property
1148 // This will have more entries than we need...
1149 EnumToValueEntry
* enumToValue
= MALLOC(EnumToValueEntry
, PROPERTY_COUNT
);
1150 int32_t enumToValue_count
= 0;
1151 for (i
=0, j
=0; i
<PROPERTY_COUNT
; ++i
) {
1152 if (PROPERTY
[i
].valueCount
== 0) continue;
1153 AliasArrayList
values(PROPERTY
[i
].valueList
,
1154 PROPERTY
[i
].valueCount
);
1155 enumToValue
[j
].enumValue
= PROPERTY
[i
].enumValue
;
1156 enumToValue
[j
].enumToName
= createEnumIndex(values
);
1157 enumToValue
[j
].enumToName_count
= PROPERTY
[i
].valueCount
;
1158 enumToValue
[j
].nameToEnum
= createNameIndex(values
,
1159 enumToValue
[j
].nameToEnum_count
);
1162 enumToValue_count
= j
;
1164 uprv_sortArray(enumToValue
, enumToValue_count
, sizeof(enumToValue
[0]),
1165 compareEnumToValueEntry
, NULL
, FALSE
, &status
);
1167 // ------------------------------------------------------------
1168 // Build PropertyAliases layout in memory
1170 Builder
builder(debug
);
1172 builder
.buildTopLevelProperties(propName
,
1177 builder
.buildValues(enumToValue
,
1180 builder
.buildStringPool(STRING_TABLE
,
1187 ////////////////////////////////////////////////////////////
1188 // Write the output file
1189 ////////////////////////////////////////////////////////////
1190 int32_t wlen
= writeDataFile(options
[3].value
, builder
);
1192 fprintf(stdout
, "Output file: %s.%s, %ld bytes\n",
1193 U_ICUDATA_NAME
"_" PNAME_DATA_NAME
, PNAME_DATA_TYPE
, (long)wlen
);
1196 return 0; // success
1199 int32_t genpname::writeDataFile(const char *destdir
, const Builder
& builder
) {
1201 int8_t* data
= builder
.createData(length
);
1203 UNewDataMemory
*pdata
;
1204 UErrorCode status
= U_ZERO_ERROR
;
1206 pdata
= udata_create(destdir
, PNAME_DATA_TYPE
, PNAME_DATA_NAME
, &dataInfo
,
1207 useCopyright
? U_COPYRIGHT_STRING
: 0, &status
);
1208 if (U_FAILURE(status
)) {
1209 die("Unable to create data memory");
1212 udata_writeBlock(pdata
, data
, length
);
1214 int32_t dataLength
= (int32_t) udata_finish(pdata
, &status
);
1215 if (U_FAILURE(status
)) {
1216 die("Error writing output file");
1218 if (dataLength
!= length
) {
1219 die("Written file doesn't match expected size");