]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/genpname/genpname.cpp
ICU-8.11.tar.gz
[apple/icu.git] / icuSources / tools / genpname / genpname.cpp
1 /*
2 **********************************************************************
3 * Copyright (C) 2002-2006, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 10/11/02 aliu Creation.
8 **********************************************************************
9 */
10
11 #include "unicode/utypes.h"
12 #include "unicode/putil.h"
13 #include "unicode/uclean.h"
14 #include "cmemory.h"
15 #include "cstring.h"
16 #include "filestrm.h"
17 #include "uarrsort.h"
18 #include "unewdata.h"
19 #include "uoptions.h"
20 #include "uprops.h"
21 #include "propname.h"
22 #include "uassert.h"
23
24 #include <stdio.h>
25
26 // TODO: Clean up and comment this code.
27
28 //----------------------------------------------------------------------
29 // BEGIN DATA
30 //
31 // This is the raw data to be output. We define the data structure,
32 // then include a machine-generated header that contains the actual
33 // data.
34
35 #include "unicode/uchar.h"
36 #include "unicode/uscript.h"
37 #include "unicode/unorm.h"
38
39 class AliasName {
40 public:
41 const char* str;
42 int32_t index;
43
44 AliasName(const char* str, int32_t index);
45
46 int compare(const AliasName& other) const;
47
48 UBool operator==(const AliasName& other) const {
49 return compare(other) == 0;
50 }
51
52 UBool operator!=(const AliasName& other) const {
53 return compare(other) != 0;
54 }
55 };
56
57 AliasName::AliasName(const char* _str,
58 int32_t _index) :
59 str(_str),
60 index(_index)
61 {
62 }
63
64 int AliasName::compare(const AliasName& other) const {
65 return uprv_comparePropertyNames(str, other.str);
66 }
67
68 class Alias {
69 public:
70 int32_t enumValue;
71 int32_t nameGroupIndex;
72
73 Alias(int32_t enumValue,
74 int32_t nameGroupIndex);
75
76 int32_t getUniqueNames(int32_t* nameGroupIndices) const;
77 };
78
79 Alias::Alias(int32_t anEnumValue,
80 int32_t aNameGroupIndex) :
81 enumValue(anEnumValue),
82 nameGroupIndex(aNameGroupIndex)
83 {
84 }
85
86 class Property : public Alias {
87 public:
88 int32_t valueCount;
89 const Alias* valueList;
90
91 Property(int32_t enumValue,
92 int32_t nameGroupIndex,
93 int32_t valueCount,
94 const Alias* valueList);
95 };
96
97 Property::Property(int32_t _enumValue,
98 int32_t _nameGroupIndex,
99 int32_t _valueCount,
100 const Alias* _valueList) :
101 Alias(_enumValue, _nameGroupIndex),
102 valueCount(_valueCount),
103 valueList(_valueList)
104 {
105 }
106
107 // *** Include the data header ***
108 #include "data.h"
109
110 /* return a list of unique names, not including "", for this property
111 * @param stringIndices array of at least MAX_NAMES_PER_GROUP
112 * elements, will be filled with indices into STRING_TABLE
113 * @return number of indices, >= 1
114 */
115 int32_t Alias::getUniqueNames(int32_t* stringIndices) const {
116 int32_t count = 0;
117 int32_t i = nameGroupIndex;
118 UBool done = FALSE;
119 while (!done) {
120 int32_t j = NAME_GROUP[i++];
121 if (j < 0) {
122 done = TRUE;
123 j = -j;
124 }
125 if (j == 0) continue; // omit "" entries
126 UBool dupe = FALSE;
127 for (int32_t k=0; k<count; ++k) {
128 if (stringIndices[k] == j) {
129 dupe = TRUE;
130 break;
131 }
132 // also do a string check for things like "age|Age"
133 if (STRING_TABLE[stringIndices[k]] == STRING_TABLE[j]) {
134 //printf("Found dupe %s|%s\n",
135 // STRING_TABLE[stringIndices[k]].str,
136 // STRING_TABLE[j].str);
137 dupe = TRUE;
138 break;
139 }
140 }
141 if (dupe) continue; // omit duplicates
142 stringIndices[count++] = j;
143 }
144 return count;
145 }
146
147 // END DATA
148 //----------------------------------------------------------------------
149
150 #define MALLOC(type, count) \
151 (type*) uprv_malloc(sizeof(type) * count)
152
153 void die(const char* msg) {
154 fprintf(stderr, "Error: %s\n", msg);
155 exit(1);
156 }
157
158 //----------------------------------------------------------------------
159
160 /**
161 * A list of Alias objects.
162 */
163 class AliasList {
164 public:
165 virtual ~AliasList();
166 virtual const Alias& operator[](int32_t i) const = 0;
167 virtual int32_t count() const = 0;
168 };
169
170 AliasList::~AliasList() {}
171
172 /**
173 * A single array.
174 */
175 class AliasArrayList : public AliasList {
176 const Alias* a;
177 int32_t n;
178 public:
179 AliasArrayList(const Alias* _a, int32_t _n) {
180 a = _a;
181 n = _n;
182 }
183 virtual const Alias& operator[](int32_t i) const {
184 return a[i];
185 }
186 virtual int32_t count() const {
187 return n;
188 }
189 };
190
191 /**
192 * A single array.
193 */
194 class PropertyArrayList : public AliasList {
195 const Property* a;
196 int32_t n;
197 public:
198 PropertyArrayList(const Property* _a, int32_t _n) {
199 a = _a;
200 n = _n;
201 }
202 virtual const Alias& operator[](int32_t i) const {
203 return a[i];
204 }
205 virtual int32_t count() const {
206 return n;
207 }
208 };
209
210 //----------------------------------------------------------------------
211
212 /**
213 * An element in a name index. It maps a name (given by index) into
214 * an enum value.
215 */
216 class NameToEnumEntry {
217 public:
218 int32_t nameIndex;
219 int32_t enumValue;
220 NameToEnumEntry(int32_t a, int32_t b) { nameIndex=a; enumValue=b; }
221 };
222
223 // Sort function for NameToEnumEntry (sort by name)
224 U_CFUNC int32_t
225 compareNameToEnumEntry(const void * /*context*/, const void* e1, const void* e2) {
226 return
227 STRING_TABLE[((NameToEnumEntry*)e1)->nameIndex].
228 compare(STRING_TABLE[((NameToEnumEntry*)e2)->nameIndex]);
229 }
230
231 //----------------------------------------------------------------------
232
233 /**
234 * An element in an enum index. It maps an enum into a name group entry
235 * (given by index).
236 */
237 class EnumToNameGroupEntry {
238 public:
239 int32_t enumValue;
240 int32_t nameGroupIndex;
241 EnumToNameGroupEntry(int32_t a, int32_t b) { enumValue=a; nameGroupIndex=b; }
242
243 // are enumValues contiguous for count entries starting with this one?
244 // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
245 UBool isContiguous(int32_t count) const {
246 const EnumToNameGroupEntry* p = this;
247 for (int32_t i=1; i<count; ++i) {
248 if (p[i].enumValue != (this->enumValue + i)) {
249 return FALSE;
250 }
251 }
252 return TRUE;
253 }
254 };
255
256 // Sort function for EnumToNameGroupEntry (sort by name index)
257 U_CFUNC int32_t
258 compareEnumToNameGroupEntry(const void * /*context*/, const void* e1, const void* e2) {
259 return ((EnumToNameGroupEntry*)e1)->enumValue - ((EnumToNameGroupEntry*)e2)->enumValue;
260 }
261
262 //----------------------------------------------------------------------
263
264 /**
265 * An element in the map from enumerated property enums to value maps.
266 */
267 class EnumToValueEntry {
268 public:
269 int32_t enumValue;
270 EnumToNameGroupEntry* enumToName;
271 int32_t enumToName_count;
272 NameToEnumEntry* nameToEnum;
273 int32_t nameToEnum_count;
274
275 // are enumValues contiguous for count entries starting with this one?
276 // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
277 UBool isContiguous(int32_t count) const {
278 const EnumToValueEntry* p = this;
279 for (int32_t i=1; i<count; ++i) {
280 if (p[i].enumValue != (this->enumValue + i)) {
281 return FALSE;
282 }
283 }
284 return TRUE;
285 }
286 };
287
288 // Sort function for EnumToValueEntry (sort by enum)
289 U_CFUNC int32_t
290 compareEnumToValueEntry(const void * /*context*/, const void* e1, const void* e2) {
291 return ((EnumToValueEntry*)e1)->enumValue - ((EnumToValueEntry*)e2)->enumValue;
292 }
293
294 //----------------------------------------------------------------------
295 // BEGIN Builder
296
297 #define IS_VALID_OFFSET(x) (((x)>=0)&&((x)<=MAX_OFFSET))
298
299 class Builder {
300 // header:
301 PropertyAliases header;
302
303 // 0:
304 NonContiguousEnumToOffset* enumToName;
305 int32_t enumToName_size;
306 Offset enumToName_offset;
307
308 // 1: (deleted)
309
310 // 2:
311 NameToEnum* nameToEnum;
312 int32_t nameToEnum_size;
313 Offset nameToEnum_offset;
314
315 // 3:
316 NonContiguousEnumToOffset* enumToValue;
317 int32_t enumToValue_size;
318 Offset enumToValue_offset;
319
320 // 4:
321 ValueMap* valueMap;
322 int32_t valueMap_size;
323 int32_t valueMap_count;
324 Offset valueMap_offset;
325
326 // for any i, one of valueEnumToName[i] or valueNCEnumToName[i] is
327 // NULL and one is not. valueEnumToName_size[i] is the size of
328 // the non-NULL one. i=0..valueMapCount-1
329 // 5a:
330 EnumToOffset** valueEnumToName;
331 // 5b:
332 NonContiguousEnumToOffset** valueNCEnumToName;
333 int32_t* valueEnumToName_size;
334 Offset* valueEnumToName_offset;
335 // 6:
336 // arrays of valueMap_count pointers, sizes, & offsets
337 NameToEnum** valueNameToEnum;
338 int32_t* valueNameToEnum_size;
339 Offset* valueNameToEnum_offset;
340
341 // 98:
342 Offset* nameGroupPool;
343 int32_t nameGroupPool_count;
344 int32_t nameGroupPool_size;
345 Offset nameGroupPool_offset;
346
347 // 99:
348 char* stringPool;
349 int32_t stringPool_count;
350 int32_t stringPool_size;
351 Offset stringPool_offset;
352 Offset* stringPool_offsetArray; // relative to stringPool
353
354 int32_t total_size; // size of everything
355
356 int32_t debug;
357
358 public:
359
360 Builder(int32_t debugLevel);
361 ~Builder();
362
363 void buildTopLevelProperties(const NameToEnumEntry* propName,
364 int32_t propNameCount,
365 const EnumToNameGroupEntry* propEnum,
366 int32_t propEnumCount);
367
368 void buildValues(const EnumToValueEntry* e2v,
369 int32_t count);
370
371 void buildStringPool(const AliasName* propertyNames,
372 int32_t propertyNameCount,
373 const int32_t* nameGroupIndices,
374 int32_t nameGroupIndicesCount);
375
376 void fixup();
377
378 int8_t* createData(int32_t& length) const;
379
380 private:
381
382 static EnumToOffset* buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
383 int32_t count,
384 int32_t& size);
385 static NonContiguousEnumToOffset*
386 buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
387 int32_t count,
388 int32_t& size);
389
390 static NonContiguousEnumToOffset*
391 buildNCEnumToValue(const EnumToValueEntry* e2v,
392 int32_t count,
393 int32_t& size);
394
395 static NameToEnum* buildNameToEnum(const NameToEnumEntry* nameToEnum,
396 int32_t count,
397 int32_t& size);
398
399 Offset stringIndexToOffset(int32_t index, UBool allowNeg=FALSE) const;
400 void fixupNameToEnum(NameToEnum* n);
401 void fixupEnumToNameGroup(EnumToOffset* e2ng);
402 void fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng);
403
404 void computeOffsets();
405 void fixupStringPoolOffsets();
406 void fixupNameGroupPoolOffsets();
407 void fixupMiscellaneousOffsets();
408
409 static int32_t align(int32_t a);
410 static void erase(void* p, int32_t size);
411 };
412
413 Builder::Builder(int32_t debugLevel) {
414 debug = debugLevel;
415 enumToName = 0;
416 nameToEnum = 0;
417 enumToValue = 0;
418 valueMap_count = 0;
419 valueMap = 0;
420 valueEnumToName = 0;
421 valueNCEnumToName = 0;
422 valueEnumToName_size = 0;
423 valueEnumToName_offset = 0;
424 valueNameToEnum = 0;
425 valueNameToEnum_size = 0;
426 valueNameToEnum_offset = 0;
427 nameGroupPool = 0;
428 stringPool = 0;
429 stringPool_offsetArray = 0;
430 }
431
432 Builder::~Builder() {
433 uprv_free(enumToName);
434 uprv_free(nameToEnum);
435 uprv_free(enumToValue);
436 uprv_free(valueMap);
437 for (int32_t i=0; i<valueMap_count; ++i) {
438 uprv_free(valueEnumToName[i]);
439 uprv_free(valueNCEnumToName[i]);
440 uprv_free(valueNameToEnum[i]);
441 }
442 uprv_free(valueEnumToName);
443 uprv_free(valueNCEnumToName);
444 uprv_free(valueEnumToName_size);
445 uprv_free(valueEnumToName_offset);
446 uprv_free(valueNameToEnum);
447 uprv_free(valueNameToEnum_size);
448 uprv_free(valueNameToEnum_offset);
449 uprv_free(nameGroupPool);
450 uprv_free(stringPool);
451 uprv_free(stringPool_offsetArray);
452 }
453
454 int32_t Builder::align(int32_t a) {
455 U_ASSERT(a >= 0);
456 int32_t k = a % sizeof(int32_t);
457 if (k == 0) {
458 return a;
459 }
460 a += sizeof(int32_t) - k;
461 return a;
462 }
463
464 void Builder::erase(void* p, int32_t size) {
465 U_ASSERT(size >= 0);
466 int8_t* q = (int8_t*) p;
467 while (size--) {
468 *q++ = 0;
469 }
470 }
471
472 EnumToOffset* Builder::buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
473 int32_t count,
474 int32_t& size) {
475 U_ASSERT(e2ng->isContiguous(count));
476 size = align(EnumToOffset::getSize(count));
477 EnumToOffset* result = (EnumToOffset*) uprv_malloc(size);
478 erase(result, size);
479 result->enumStart = e2ng->enumValue;
480 result->enumLimit = e2ng->enumValue + count;
481 Offset* p = result->getOffsetArray();
482 for (int32_t i=0; i<count; ++i) {
483 // set these to NGI index values
484 // fix them up to NGI offset values
485 U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
486 p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
487 }
488 return result;
489 }
490
491 NonContiguousEnumToOffset*
492 Builder::buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
493 int32_t count,
494 int32_t& size) {
495 U_ASSERT(!e2ng->isContiguous(count));
496 size = align(NonContiguousEnumToOffset::getSize(count));
497 NonContiguousEnumToOffset* nc = (NonContiguousEnumToOffset*) uprv_malloc(size);
498 erase(nc, size);
499 nc->count = count;
500 EnumValue* e = nc->getEnumArray();
501 Offset* p = nc->getOffsetArray();
502 for (int32_t i=0; i<count; ++i) {
503 // set these to NGI index values
504 // fix them up to NGI offset values
505 e[i] = e2ng[i].enumValue;
506 U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
507 p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
508 }
509 return nc;
510 }
511
512 NonContiguousEnumToOffset*
513 Builder::buildNCEnumToValue(const EnumToValueEntry* e2v,
514 int32_t count,
515 int32_t& size) {
516 U_ASSERT(!e2v->isContiguous(count));
517 size = align(NonContiguousEnumToOffset::getSize(count));
518 NonContiguousEnumToOffset* result = (NonContiguousEnumToOffset*) uprv_malloc(size);
519 erase(result, size);
520 result->count = count;
521 EnumValue* e = result->getEnumArray();
522 for (int32_t i=0; i<count; ++i) {
523 e[i] = e2v[i].enumValue;
524 // offset must be set later
525 }
526 return result;
527 }
528
529 /**
530 * Given an index into the string pool, return an offset. computeOffsets()
531 * must have been called already. If allowNegative is true, allow negatives
532 * and preserve their sign.
533 */
534 Offset Builder::stringIndexToOffset(int32_t index, UBool allowNegative) const {
535 // Index 0 is ""; we turn this into an Offset of zero
536 if (index == 0) return 0;
537 if (index < 0) {
538 if (allowNegative) {
539 return -Builder::stringIndexToOffset(-index);
540 } else {
541 die("Negative string pool index");
542 }
543 } else {
544 if (index >= stringPool_count) {
545 die("String pool index too large");
546 }
547 Offset result = stringPool_offset + stringPool_offsetArray[index];
548 U_ASSERT(result >= 0 && result < total_size);
549 return result;
550 }
551 return 0; // never executed; make compiler happy
552 }
553
554 NameToEnum* Builder::buildNameToEnum(const NameToEnumEntry* nameToEnum,
555 int32_t count,
556 int32_t& size) {
557 size = align(NameToEnum::getSize(count));
558 NameToEnum* n2e = (NameToEnum*) uprv_malloc(size);
559 erase(n2e, size);
560 n2e->count = count;
561 Offset* p = n2e->getNameArray();
562 EnumValue* e = n2e->getEnumArray();
563 for (int32_t i=0; i<count; ++i) {
564 // set these to SP index values
565 // fix them up to SP offset values
566 U_ASSERT(IS_VALID_OFFSET(nameToEnum[i].nameIndex));
567 p[i] = (Offset) nameToEnum[i].nameIndex; // FIXUP later
568 e[i] = nameToEnum[i].enumValue;
569 }
570 return n2e;
571 }
572
573
574 void Builder::buildTopLevelProperties(const NameToEnumEntry* propName,
575 int32_t propNameCount,
576 const EnumToNameGroupEntry* propEnum,
577 int32_t propEnumCount) {
578 enumToName = buildNCEnumToNameGroup(propEnum,
579 propEnumCount,
580 enumToName_size);
581 nameToEnum = buildNameToEnum(propName,
582 propNameCount,
583 nameToEnum_size);
584 }
585
586 void Builder::buildValues(const EnumToValueEntry* e2v,
587 int32_t count) {
588 int32_t i;
589
590 U_ASSERT(!e2v->isContiguous(count));
591
592 valueMap_count = count;
593
594 enumToValue = buildNCEnumToValue(e2v, count,
595 enumToValue_size);
596
597 valueMap_size = align(count * sizeof(ValueMap));
598 valueMap = (ValueMap*) uprv_malloc(valueMap_size);
599 erase(valueMap, valueMap_size);
600
601 valueEnumToName = MALLOC(EnumToOffset*, count);
602 valueNCEnumToName = MALLOC(NonContiguousEnumToOffset*, count);
603 valueEnumToName_size = MALLOC(int32_t, count);
604 valueEnumToName_offset = MALLOC(Offset, count);
605 valueNameToEnum = MALLOC(NameToEnum*, count);
606 valueNameToEnum_size = MALLOC(int32_t, count);
607 valueNameToEnum_offset = MALLOC(Offset, count);
608
609 for (i=0; i<count; ++i) {
610 UBool isContiguous =
611 e2v[i].enumToName->isContiguous(e2v[i].enumToName_count);
612 valueEnumToName[i] = 0;
613 valueNCEnumToName[i] = 0;
614 if (isContiguous) {
615 valueEnumToName[i] = buildEnumToOffset(e2v[i].enumToName,
616 e2v[i].enumToName_count,
617 valueEnumToName_size[i]);
618 } else {
619 valueNCEnumToName[i] = buildNCEnumToNameGroup(e2v[i].enumToName,
620 e2v[i].enumToName_count,
621 valueEnumToName_size[i]);
622 }
623 valueNameToEnum[i] =
624 buildNameToEnum(e2v[i].nameToEnum,
625 e2v[i].nameToEnum_count,
626 valueNameToEnum_size[i]);
627 }
628 }
629
630 void Builder::buildStringPool(const AliasName* propertyNames,
631 int32_t propertyNameCount,
632 const int32_t* nameGroupIndices,
633 int32_t nameGroupIndicesCount) {
634 int32_t i;
635
636 nameGroupPool_count = nameGroupIndicesCount;
637 nameGroupPool_size = sizeof(Offset) * nameGroupPool_count;
638 nameGroupPool = MALLOC(Offset, nameGroupPool_count);
639
640 for (i=0; i<nameGroupPool_count; ++i) {
641 // Some indices are negative.
642 int32_t a = nameGroupIndices[i];
643 if (a < 0) a = -a;
644 U_ASSERT(IS_VALID_OFFSET(a));
645 nameGroupPool[i] = (Offset) nameGroupIndices[i];
646 }
647
648 stringPool_count = propertyNameCount;
649 stringPool_size = 0;
650 // first string must be "" -- we skip it
651 U_ASSERT(*propertyNames[0].str == 0);
652 for (i=1 /*sic*/; i<propertyNameCount; ++i) {
653 stringPool_size += (int32_t)(uprv_strlen(propertyNames[i].str) + 1);
654 }
655 stringPool = MALLOC(char, stringPool_size);
656 stringPool_offsetArray = MALLOC(Offset, stringPool_count);
657 Offset soFar = 0;
658 char* p = stringPool;
659 stringPool_offsetArray[0] = -1; // we don't use this entry
660 for (i=1 /*sic*/; i<propertyNameCount; ++i) {
661 const char* str = propertyNames[i].str;
662 int32_t len = (int32_t)uprv_strlen(str);
663 uprv_strcpy(p, str);
664 p += len;
665 *p++ = 0;
666 stringPool_offsetArray[i] = soFar;
667 soFar += (Offset)(len+1);
668 }
669 U_ASSERT(soFar == stringPool_size);
670 U_ASSERT(p == (stringPool + stringPool_size));
671 }
672
673 // Confirm that PropertyAliases is a POD (plain old data; see C++
674 // std). The following union will _fail to compile_ if
675 // PropertyAliases is _not_ a POD. (Note: We used to use the offsetof
676 // macro to check this, but that's not quite right, so that test is
677 // commented out -- see below.)
678 typedef union {
679 int32_t i;
680 PropertyAliases p;
681 } PropertyAliasesPODTest;
682
683 void Builder::computeOffsets() {
684 int32_t i;
685 Offset off = sizeof(header);
686
687 if (debug>0) {
688 printf("header \t offset=%4d size=%5d\n", 0, off);
689 }
690
691 // PropertyAliases must have no v-table and must be
692 // padded (if necessary) to the next 32-bit boundary.
693 //U_ASSERT(offsetof(PropertyAliases, enumToName_offset) == 0); // see above
694 U_ASSERT(sizeof(header) % sizeof(int32_t) == 0);
695
696 #define COMPUTE_OFFSET(foo) COMPUTE_OFFSET2(foo,int32_t)
697
698 #define COMPUTE_OFFSET2(foo,type) \
699 if (debug>0)\
700 printf(#foo "\t offset=%4d size=%5d\n", off, (int)foo##_size);\
701 foo##_offset = off;\
702 U_ASSERT(IS_VALID_OFFSET(off + foo##_size));\
703 U_ASSERT(foo##_offset % sizeof(type) == 0);\
704 off = (Offset) (off + foo##_size);
705
706 COMPUTE_OFFSET(enumToName); // 0:
707 COMPUTE_OFFSET(nameToEnum); // 2:
708 COMPUTE_OFFSET(enumToValue); // 3:
709 COMPUTE_OFFSET(valueMap); // 4:
710
711 for (i=0; i<valueMap_count; ++i) {
712 if (debug>0) {
713 printf(" enumToName[%d]\t offset=%4d size=%5d\n",
714 (int)i, off, (int)valueEnumToName_size[i]);
715 }
716
717 valueEnumToName_offset[i] = off; // 5:
718 U_ASSERT(IS_VALID_OFFSET(off + valueEnumToName_size[i]));
719 off = (Offset) (off + valueEnumToName_size[i]);
720
721 if (debug>0) {
722 printf(" nameToEnum[%d]\t offset=%4d size=%5d\n",
723 (int)i, off, (int)valueNameToEnum_size[i]);
724 }
725
726 valueNameToEnum_offset[i] = off; // 6:
727 U_ASSERT(IS_VALID_OFFSET(off + valueNameToEnum_size[i]));
728 off = (Offset) (off + valueNameToEnum_size[i]);
729 }
730
731 // These last two chunks have weaker alignment needs
732 COMPUTE_OFFSET2(nameGroupPool,Offset); // 98:
733 COMPUTE_OFFSET2(stringPool,char); // 99:
734
735 total_size = off;
736 if (debug>0) printf("total size=%5d\n\n", (int)total_size);
737 U_ASSERT(total_size <= (MAX_OFFSET+1));
738 }
739
740 void Builder::fixupNameToEnum(NameToEnum* n) {
741 // Fix the string pool offsets in n
742 Offset* p = n->getNameArray();
743 for (int32_t i=0; i<n->count; ++i) {
744 p[i] = stringIndexToOffset(p[i]);
745 }
746 }
747
748 void Builder::fixupStringPoolOffsets() {
749 int32_t i;
750
751 // 2:
752 fixupNameToEnum(nameToEnum);
753
754 // 6:
755 for (i=0; i<valueMap_count; ++i) {
756 fixupNameToEnum(valueNameToEnum[i]);
757 }
758
759 // 98:
760 for (i=0; i<nameGroupPool_count; ++i) {
761 nameGroupPool[i] = stringIndexToOffset(nameGroupPool[i], TRUE);
762 }
763 }
764
765 void Builder::fixupEnumToNameGroup(EnumToOffset* e2ng) {
766 EnumValue i;
767 int32_t j;
768 Offset* p = e2ng->getOffsetArray();
769 for (i=e2ng->enumStart, j=0; i<e2ng->enumLimit; ++i, ++j) {
770 p[j] = nameGroupPool_offset + sizeof(Offset) * p[j];
771 }
772 }
773
774 void Builder::fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng) {
775 int32_t i;
776 /*EnumValue* e = e2ng->getEnumArray();*/
777 Offset* p = e2ng->getOffsetArray();
778 for (i=0; i<e2ng->count; ++i) {
779 p[i] = nameGroupPool_offset + sizeof(Offset) * p[i];
780 }
781 }
782
783 void Builder::fixupNameGroupPoolOffsets() {
784 int32_t i;
785
786 // 0:
787 fixupNCEnumToNameGroup(enumToName);
788
789 // 1: (deleted)
790
791 // 5:
792 for (i=0; i<valueMap_count; ++i) {
793 // 5a:
794 if (valueEnumToName[i] != 0) {
795 fixupEnumToNameGroup(valueEnumToName[i]);
796 }
797 // 5b:
798 if (valueNCEnumToName[i] != 0) {
799 fixupNCEnumToNameGroup(valueNCEnumToName[i]);
800 }
801 }
802 }
803
804 void Builder::fixupMiscellaneousOffsets() {
805 int32_t i;
806
807 // header:
808 erase(&header, sizeof(header));
809 header.enumToName_offset = enumToName_offset;
810 header.nameToEnum_offset = nameToEnum_offset;
811 header.enumToValue_offset = enumToValue_offset;
812 // header meta-info used by Java:
813 U_ASSERT(total_size > 0 && total_size < 0x7FFF);
814 header.total_size = (int16_t) total_size;
815 header.valueMap_offset = valueMap_offset;
816 header.valueMap_count = (int16_t) valueMap_count;
817 header.nameGroupPool_offset = nameGroupPool_offset;
818 header.nameGroupPool_count = (int16_t) nameGroupPool_count;
819 header.stringPool_offset = stringPool_offset;
820 header.stringPool_count = (int16_t) stringPool_count - 1; // don't include "" entry
821
822 U_ASSERT(valueMap_count <= 0x7FFF);
823 U_ASSERT(nameGroupPool_count <= 0x7FFF);
824 U_ASSERT(stringPool_count <= 0x7FFF);
825
826 // 3:
827 Offset* p = enumToValue->getOffsetArray();
828 /*EnumValue* e = enumToValue->getEnumArray();*/
829 U_ASSERT(valueMap_count == enumToValue->count);
830 for (i=0; i<valueMap_count; ++i) {
831 p[i] = (Offset)(valueMap_offset + sizeof(ValueMap) * i);
832 }
833
834 // 4:
835 for (i=0; i<valueMap_count; ++i) {
836 ValueMap& v = valueMap[i];
837 v.enumToName_offset = v.ncEnumToName_offset = 0;
838 if (valueEnumToName[i] != 0) {
839 v.enumToName_offset = valueEnumToName_offset[i];
840 }
841 if (valueNCEnumToName[i] != 0) {
842 v.ncEnumToName_offset = valueEnumToName_offset[i];
843 }
844 v.nameToEnum_offset = valueNameToEnum_offset[i];
845 }
846 }
847
848 void Builder::fixup() {
849 computeOffsets();
850 fixupStringPoolOffsets();
851 fixupNameGroupPoolOffsets();
852 fixupMiscellaneousOffsets();
853 }
854
855 int8_t* Builder::createData(int32_t& length) const {
856 length = total_size;
857 int8_t* result = MALLOC(int8_t, length);
858
859 int8_t* p = result;
860 int8_t* limit = result + length;
861
862 #define APPEND2(x, size) \
863 U_ASSERT((p+size)<=limit); \
864 uprv_memcpy(p, x, size); \
865 p += size
866
867 #define APPEND(x) APPEND2(x, x##_size)
868
869 APPEND2(&header, sizeof(header));
870 APPEND(enumToName);
871 APPEND(nameToEnum);
872 APPEND(enumToValue);
873 APPEND(valueMap);
874
875 for (int32_t i=0; i<valueMap_count; ++i) {
876 U_ASSERT((valueEnumToName[i] != 0 && valueNCEnumToName[i] == 0) ||
877 (valueEnumToName[i] == 0 && valueNCEnumToName[i] != 0));
878 if (valueEnumToName[i] != 0) {
879 APPEND2(valueEnumToName[i], valueEnumToName_size[i]);
880 }
881 if (valueNCEnumToName[i] != 0) {
882 APPEND2(valueNCEnumToName[i], valueEnumToName_size[i]);
883 }
884 APPEND2(valueNameToEnum[i], valueNameToEnum_size[i]);
885 }
886
887 APPEND(nameGroupPool);
888 APPEND(stringPool);
889
890 if (p != limit) {
891 fprintf(stderr, "p != limit; p = %p, limit = %p", p, limit);
892 exit(1);
893 }
894 return result;
895 }
896
897 // END Builder
898 //----------------------------------------------------------------------
899
900 /* UDataInfo cf. udata.h */
901 static UDataInfo dataInfo = {
902 sizeof(UDataInfo),
903 0,
904
905 U_IS_BIG_ENDIAN,
906 U_CHARSET_FAMILY,
907 sizeof(UChar),
908 0,
909
910 {PNAME_SIG_0, PNAME_SIG_1, PNAME_SIG_2, PNAME_SIG_3},
911 {PNAME_FORMAT_VERSION, 0, 0, 0}, /* formatVersion */
912 {VERSION_0, VERSION_1, VERSION_2, VERSION_3} /* Unicode version */
913 };
914
915 class genpname {
916
917 // command-line options
918 UBool useCopyright;
919 UBool verbose;
920 int32_t debug;
921
922 public:
923 int MMain(int argc, char *argv[]);
924
925 private:
926 NameToEnumEntry* createNameIndex(const AliasList& list,
927 int32_t& nameIndexCount);
928
929 EnumToNameGroupEntry* createEnumIndex(const AliasList& list);
930
931 int32_t writeDataFile(const char *destdir, const Builder&);
932 };
933
934 int main(int argc, char *argv[]) {
935 UErrorCode status = U_ZERO_ERROR;
936 u_init(&status);
937 if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
938 // Note: u_init() will try to open ICU property data.
939 // failures here are expected when building ICU from scratch.
940 // ignore them.
941 fprintf(stderr, "genpname: can not initialize ICU. Status = %s\n",
942 u_errorName(status));
943 exit(1);
944 }
945
946 genpname app;
947 U_MAIN_INIT_ARGS(argc, argv);
948 int retVal = app.MMain(argc, argv);
949 u_cleanup();
950 return retVal;
951 }
952
953 static UOption options[]={
954 UOPTION_HELP_H,
955 UOPTION_HELP_QUESTION_MARK,
956 UOPTION_COPYRIGHT,
957 UOPTION_DESTDIR,
958 UOPTION_VERBOSE,
959 UOPTION_DEF("debug", 'D', UOPT_REQUIRES_ARG),
960 };
961
962 NameToEnumEntry* genpname::createNameIndex(const AliasList& list,
963 int32_t& nameIndexCount) {
964
965 // Build name => enum map
966
967 // This is an n->1 map. There are typically multiple names
968 // mapping to one enum. The name index is sorted in order of the name,
969 // as defined by the uprv_compareAliasNames() function.
970
971 int32_t i, j;
972 int32_t count = list.count();
973
974 // compute upper limit on number of names in the index
975 int32_t nameIndexCapacity = count * MAX_NAMES_PER_GROUP;
976 NameToEnumEntry* nameIndex = MALLOC(NameToEnumEntry, nameIndexCapacity);
977
978 nameIndexCount = 0;
979 int32_t names[MAX_NAMES_PER_GROUP];
980 for (i=0; i<count; ++i) {
981 const Alias& p = list[i];
982 int32_t n = p.getUniqueNames(names);
983 for (j=0; j<n; ++j) {
984 U_ASSERT(nameIndexCount < nameIndexCapacity);
985 nameIndex[nameIndexCount++] =
986 NameToEnumEntry(names[j], p.enumValue);
987 }
988 }
989
990 /*
991 * use a stable sort to ensure consistent results between
992 * genpname.cpp and the propname.cpp swapping code
993 */
994 UErrorCode errorCode = U_ZERO_ERROR;
995 uprv_sortArray(nameIndex, nameIndexCount, sizeof(nameIndex[0]),
996 compareNameToEnumEntry, NULL, TRUE, &errorCode);
997 if (debug>1) {
998 printf("Alias names: %d\n", (int)nameIndexCount);
999 for (i=0; i<nameIndexCount; ++i) {
1000 printf("%s => %d\n",
1001 STRING_TABLE[nameIndex[i].nameIndex].str,
1002 (int)nameIndex[i].enumValue);
1003 }
1004 printf("\n");
1005 }
1006 // make sure there are no duplicates. for a sorted list we need
1007 // only compare adjacent items. Alias.getUniqueNames() has
1008 // already eliminated duplicate names for a single property, which
1009 // does occur, so we're checking for duplicate names between two
1010 // properties, which should never occur.
1011 UBool ok = TRUE;
1012 for (i=1; i<nameIndexCount; ++i) {
1013 if (STRING_TABLE[nameIndex[i-1].nameIndex] ==
1014 STRING_TABLE[nameIndex[i].nameIndex]) {
1015 printf("Error: Duplicate names in property list: \"%s\", \"%s\"\n",
1016 STRING_TABLE[nameIndex[i-1].nameIndex].str,
1017 STRING_TABLE[nameIndex[i].nameIndex].str);
1018 ok = FALSE;
1019 }
1020 }
1021 if (!ok) {
1022 die("Two or more duplicate names in property list");
1023 }
1024
1025 return nameIndex;
1026 }
1027
1028 EnumToNameGroupEntry* genpname::createEnumIndex(const AliasList& list) {
1029
1030 // Build the enum => name map
1031
1032 // This is a 1->n map. Each enum maps to 1 or more names. To
1033 // accomplish this the index entry points to an element of the
1034 // NAME_GROUP array. This is the short name (which may be empty).
1035 // From there, subsequent elements of NAME_GROUP are alternate
1036 // names for this enum, up to and including the first one that is
1037 // negative (negate for actual index).
1038
1039 int32_t i, j, k;
1040 int32_t count = list.count();
1041
1042 EnumToNameGroupEntry* enumIndex = MALLOC(EnumToNameGroupEntry, count);
1043 for (i=0; i<count; ++i) {
1044 const Alias& p = list[i];
1045 enumIndex[i] = EnumToNameGroupEntry(p.enumValue, p.nameGroupIndex);
1046 }
1047
1048 UErrorCode errorCode = U_ZERO_ERROR;
1049 uprv_sortArray(enumIndex, count, sizeof(enumIndex[0]),
1050 compareEnumToNameGroupEntry, NULL, FALSE, &errorCode);
1051 if (debug>1) {
1052 printf("Property enums: %d\n", (int)count);
1053 for (i=0; i<count; ++i) {
1054 printf("%d => %d: ",
1055 (int)enumIndex[i].enumValue,
1056 (int)enumIndex[i].nameGroupIndex);
1057 UBool done = FALSE;
1058 for (j=enumIndex[i].nameGroupIndex; !done; ++j) {
1059 k = NAME_GROUP[j];
1060 if (k < 0) {
1061 k = -k;
1062 done = TRUE;
1063 }
1064 printf("\"%s\"", STRING_TABLE[k].str);
1065 if (!done) printf(", ");
1066 }
1067 printf("\n");
1068 }
1069 printf("\n");
1070 }
1071 return enumIndex;
1072 }
1073
1074 int genpname::MMain(int argc, char* argv[])
1075 {
1076 int32_t i, j;
1077 UErrorCode status = U_ZERO_ERROR;
1078
1079 u_init(&status);
1080 if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
1081 fprintf(stderr, "Error: u_init returned %s\n", u_errorName(status));
1082 status = U_ZERO_ERROR;
1083 }
1084
1085
1086 /* preset then read command line options */
1087 options[3].value=u_getDataDirectory();
1088 argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
1089
1090 /* error handling, printing usage message */
1091 if (argc<0) {
1092 fprintf(stderr,
1093 "error in command line argument \"%s\"\n",
1094 argv[-argc]);
1095 }
1096
1097 debug = options[5].doesOccur ? (*options[5].value - '0') : 0;
1098
1099 if (argc!=1 || options[0].doesOccur || options[1].doesOccur ||
1100 debug < 0 || debug > 9) {
1101 fprintf(stderr,
1102 "usage: %s [-options]\n"
1103 "\tcreate " PNAME_DATA_NAME "." PNAME_DATA_TYPE "\n"
1104 "options:\n"
1105 "\t-h or -? or --help this usage text\n"
1106 "\t-v or --verbose turn on verbose output\n"
1107 "\t-c or --copyright include a copyright notice\n"
1108 "\t-d or --destdir destination directory, followed by the path\n"
1109 "\t-D or --debug 0..9 emit debugging messages (if > 0)\n",
1110 argv[0]);
1111 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
1112 }
1113
1114 /* get the options values */
1115 useCopyright=options[2].doesOccur;
1116 verbose = options[4].doesOccur;
1117
1118 // ------------------------------------------------------------
1119 // Do not sort the string table, instead keep it in data.h order.
1120 // This simplifies data swapping and testing thereof because the string
1121 // table itself need not be sorted during swapping.
1122 // The NameToEnum sorter sorts each such map's string offsets instead.
1123
1124 if (debug>1) {
1125 printf("String pool: %d\n", (int)STRING_COUNT);
1126 for (i=0; i<STRING_COUNT; ++i) {
1127 if (i != 0) {
1128 printf(", ");
1129 }
1130 printf("%s (%d)", STRING_TABLE[i].str, (int)STRING_TABLE[i].index);
1131 }
1132 printf("\n\n");
1133 }
1134
1135 // ------------------------------------------------------------
1136 // Create top-level property indices
1137
1138 PropertyArrayList props(PROPERTY, PROPERTY_COUNT);
1139 int32_t propNameCount;
1140 NameToEnumEntry* propName = createNameIndex(props, propNameCount);
1141 EnumToNameGroupEntry* propEnum = createEnumIndex(props);
1142
1143 // ------------------------------------------------------------
1144 // Create indices for the value list for each enumerated property
1145
1146 // This will have more entries than we need...
1147 EnumToValueEntry* enumToValue = MALLOC(EnumToValueEntry, PROPERTY_COUNT);
1148 int32_t enumToValue_count = 0;
1149 for (i=0, j=0; i<PROPERTY_COUNT; ++i) {
1150 if (PROPERTY[i].valueCount == 0) continue;
1151 AliasArrayList values(PROPERTY[i].valueList,
1152 PROPERTY[i].valueCount);
1153 enumToValue[j].enumValue = PROPERTY[i].enumValue;
1154 enumToValue[j].enumToName = createEnumIndex(values);
1155 enumToValue[j].enumToName_count = PROPERTY[i].valueCount;
1156 enumToValue[j].nameToEnum = createNameIndex(values,
1157 enumToValue[j].nameToEnum_count);
1158 ++j;
1159 }
1160 enumToValue_count = j;
1161
1162 uprv_sortArray(enumToValue, enumToValue_count, sizeof(enumToValue[0]),
1163 compareEnumToValueEntry, NULL, FALSE, &status);
1164
1165 // ------------------------------------------------------------
1166 // Build PropertyAliases layout in memory
1167
1168 Builder builder(debug);
1169
1170 builder.buildTopLevelProperties(propName,
1171 propNameCount,
1172 propEnum,
1173 PROPERTY_COUNT);
1174
1175 builder.buildValues(enumToValue,
1176 enumToValue_count);
1177
1178 builder.buildStringPool(STRING_TABLE,
1179 STRING_COUNT,
1180 NAME_GROUP,
1181 NAME_GROUP_COUNT);
1182
1183 builder.fixup();
1184
1185 ////////////////////////////////////////////////////////////
1186 // Write the output file
1187 ////////////////////////////////////////////////////////////
1188 int32_t wlen = writeDataFile(options[3].value, builder);
1189 if (verbose) {
1190 fprintf(stdout, "Output file: %s.%s, %ld bytes\n",
1191 U_ICUDATA_NAME "_" PNAME_DATA_NAME, PNAME_DATA_TYPE, (long)wlen);
1192 }
1193
1194 return 0; // success
1195 }
1196
1197 int32_t genpname::writeDataFile(const char *destdir, const Builder& builder) {
1198 int32_t length;
1199 int8_t* data = builder.createData(length);
1200
1201 UNewDataMemory *pdata;
1202 UErrorCode status = U_ZERO_ERROR;
1203
1204 pdata = udata_create(destdir, PNAME_DATA_TYPE, PNAME_DATA_NAME, &dataInfo,
1205 useCopyright ? U_COPYRIGHT_STRING : 0, &status);
1206 if (U_FAILURE(status)) {
1207 die("Unable to create data memory");
1208 }
1209
1210 udata_writeBlock(pdata, data, length);
1211
1212 int32_t dataLength = (int32_t) udata_finish(pdata, &status);
1213 if (U_FAILURE(status)) {
1214 die("Error writing output file");
1215 }
1216 if (dataLength != length) {
1217 die("Written file doesn't match expected size");
1218 }
1219
1220 return dataLength;
1221 }
1222
1223 //eof