]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/genpname/genpname.cpp
ICU-6.2.10.tar.gz
[apple/icu.git] / icuSources / tools / genpname / genpname.cpp
1 /*
2 **********************************************************************
3 * Copyright (C) 2002-2004, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 10/11/02 aliu Creation.
8 **********************************************************************
9 */
10
11 #include "unicode/utypes.h"
12 #include "unicode/putil.h"
13 #include "unicode/uclean.h"
14 #include "cmemory.h"
15 #include "cstring.h"
16 #include "filestrm.h"
17 #include "uarrsort.h"
18 #include "unewdata.h"
19 #include "uoptions.h"
20 #include "uprops.h"
21 #include "propname.h"
22 #include "uassert.h"
23
24 #include <stdio.h>
25
26 // TODO: Clean up and comment this code.
27
28 //----------------------------------------------------------------------
29 // BEGIN DATA
30 //
31 // This is the raw data to be output. We define the data structure,
32 // then include a machine-generated header that contains the actual
33 // data.
34
35 #include "unicode/uchar.h"
36 #include "unicode/uscript.h"
37 #include "unicode/unorm.h"
38
39 class AliasName {
40 public:
41 const char* str;
42 int32_t index;
43
44 AliasName(const char* str, int32_t index);
45
46 int compare(const AliasName& other) const;
47
48 UBool operator==(const AliasName& other) const {
49 return compare(other) == 0;
50 }
51
52 UBool operator!=(const AliasName& other) const {
53 return compare(other) != 0;
54 }
55 };
56
57 AliasName::AliasName(const char* _str,
58 int32_t _index) :
59 str(_str),
60 index(_index)
61 {
62 }
63
64 int AliasName::compare(const AliasName& other) const {
65 return uprv_comparePropertyNames(str, other.str);
66 }
67
68 class Alias {
69 public:
70 int32_t enumValue;
71 int32_t nameGroupIndex;
72
73 Alias(int32_t enumValue,
74 int32_t nameGroupIndex);
75
76 int32_t getUniqueNames(int32_t* nameGroupIndices) const;
77 };
78
79 Alias::Alias(int32_t anEnumValue,
80 int32_t aNameGroupIndex) :
81 enumValue(anEnumValue),
82 nameGroupIndex(aNameGroupIndex)
83 {
84 }
85
86 class Property : public Alias {
87 public:
88 int32_t valueCount;
89 const Alias* valueList;
90
91 Property(int32_t enumValue,
92 int32_t nameGroupIndex,
93 int32_t valueCount,
94 const Alias* valueList);
95 };
96
97 Property::Property(int32_t _enumValue,
98 int32_t _nameGroupIndex,
99 int32_t _valueCount,
100 const Alias* _valueList) :
101 Alias(_enumValue, _nameGroupIndex),
102 valueCount(_valueCount),
103 valueList(_valueList)
104 {
105 }
106
107 // *** Include the data header ***
108 #include "data.h"
109
110 /* return a list of unique names, not including "", for this property
111 * @param stringIndices array of at least MAX_NAMES_PER_GROUP
112 * elements, will be filled with indices into STRING_TABLE
113 * @return number of indices, >= 1
114 */
115 int32_t Alias::getUniqueNames(int32_t* stringIndices) const {
116 int32_t count = 0;
117 int32_t i = nameGroupIndex;
118 UBool done = FALSE;
119 while (!done) {
120 int32_t j = NAME_GROUP[i++];
121 if (j < 0) {
122 done = TRUE;
123 j = -j;
124 }
125 if (j == 0) continue; // omit "" entries
126 UBool dupe = FALSE;
127 for (int32_t k=0; k<count; ++k) {
128 if (stringIndices[k] == j) {
129 dupe = TRUE;
130 break;
131 }
132 // also do a string check for things like "age|Age"
133 if (STRING_TABLE[stringIndices[k]] == STRING_TABLE[j]) {
134 //printf("Found dupe %s|%s\n",
135 // STRING_TABLE[stringIndices[k]].str,
136 // STRING_TABLE[j].str);
137 dupe = TRUE;
138 break;
139 }
140 }
141 if (dupe) continue; // omit duplicates
142 stringIndices[count++] = j;
143 }
144 return count;
145 }
146
147 // END DATA
148 //----------------------------------------------------------------------
149
150 #define MALLOC(type, count) \
151 (type*) uprv_malloc(sizeof(type) * count)
152
153 void die(const char* msg) {
154 fprintf(stderr, "Error: %s\n", msg);
155 exit(1);
156 }
157
158 //----------------------------------------------------------------------
159
160 /**
161 * A list of Alias objects.
162 */
163 class AliasList {
164 public:
165 virtual const Alias& operator[](int32_t i) const = 0;
166 virtual int32_t count() const = 0;
167 };
168
169 /**
170 * A single array.
171 */
172 class AliasArrayList : public AliasList {
173 const Alias* a;
174 int32_t n;
175 public:
176 AliasArrayList(const Alias* _a, int32_t _n) {
177 a = _a;
178 n = _n;
179 }
180 virtual const Alias& operator[](int32_t i) const {
181 return a[i];
182 }
183 virtual int32_t count() const {
184 return n;
185 }
186 };
187
188 /**
189 * A single array.
190 */
191 class PropertyArrayList : public AliasList {
192 const Property* a;
193 int32_t n;
194 public:
195 PropertyArrayList(const Property* _a, int32_t _n) {
196 a = _a;
197 n = _n;
198 }
199 virtual const Alias& operator[](int32_t i) const {
200 return a[i];
201 }
202 virtual int32_t count() const {
203 return n;
204 }
205 };
206
207 //----------------------------------------------------------------------
208
209 /**
210 * An element in a name index. It maps a name (given by index) into
211 * an enum value.
212 */
213 class NameToEnumEntry {
214 public:
215 int32_t nameIndex;
216 int32_t enumValue;
217 NameToEnumEntry(int32_t a, int32_t b) { nameIndex=a; enumValue=b; }
218 };
219
220 // Sort function for NameToEnumEntry (sort by name)
221 U_CFUNC int32_t
222 compareNameToEnumEntry(const void * /*context*/, const void* e1, const void* e2) {
223 return
224 STRING_TABLE[((NameToEnumEntry*)e1)->nameIndex].
225 compare(STRING_TABLE[((NameToEnumEntry*)e2)->nameIndex]);
226 }
227
228 //----------------------------------------------------------------------
229
230 /**
231 * An element in an enum index. It maps an enum into a name group entry
232 * (given by index).
233 */
234 class EnumToNameGroupEntry {
235 public:
236 int32_t enumValue;
237 int32_t nameGroupIndex;
238 EnumToNameGroupEntry(int32_t a, int32_t b) { enumValue=a; nameGroupIndex=b; }
239
240 // are enumValues contiguous for count entries starting with this one?
241 // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
242 UBool isContiguous(int32_t count) const {
243 const EnumToNameGroupEntry* p = this;
244 for (int32_t i=1; i<count; ++i) {
245 if (p[i].enumValue != (this->enumValue + i)) {
246 return FALSE;
247 }
248 }
249 return TRUE;
250 }
251 };
252
253 // Sort function for EnumToNameGroupEntry (sort by name index)
254 U_CFUNC int32_t
255 compareEnumToNameGroupEntry(const void * /*context*/, const void* e1, const void* e2) {
256 return ((EnumToNameGroupEntry*)e1)->enumValue - ((EnumToNameGroupEntry*)e2)->enumValue;
257 }
258
259 //----------------------------------------------------------------------
260
261 /**
262 * An element in the map from enumerated property enums to value maps.
263 */
264 class EnumToValueEntry {
265 public:
266 int32_t enumValue;
267 EnumToNameGroupEntry* enumToName;
268 int32_t enumToName_count;
269 NameToEnumEntry* nameToEnum;
270 int32_t nameToEnum_count;
271
272 // are enumValues contiguous for count entries starting with this one?
273 // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
274 UBool isContiguous(int32_t count) const {
275 const EnumToValueEntry* p = this;
276 for (int32_t i=1; i<count; ++i) {
277 if (p[i].enumValue != (this->enumValue + i)) {
278 return FALSE;
279 }
280 }
281 return TRUE;
282 }
283 };
284
285 // Sort function for EnumToValueEntry (sort by enum)
286 U_CFUNC int32_t
287 compareEnumToValueEntry(const void * /*context*/, const void* e1, const void* e2) {
288 return ((EnumToValueEntry*)e1)->enumValue - ((EnumToValueEntry*)e2)->enumValue;
289 }
290
291 //----------------------------------------------------------------------
292 // BEGIN Builder
293
294 #define IS_VALID_OFFSET(x) (((x)>=0)&&((x)<=MAX_OFFSET))
295
296 class Builder {
297 // header:
298 PropertyAliases header;
299
300 // 0:
301 NonContiguousEnumToOffset* enumToName;
302 int32_t enumToName_size;
303 Offset enumToName_offset;
304
305 // 1: (deleted)
306
307 // 2:
308 NameToEnum* nameToEnum;
309 int32_t nameToEnum_size;
310 Offset nameToEnum_offset;
311
312 // 3:
313 NonContiguousEnumToOffset* enumToValue;
314 int32_t enumToValue_size;
315 Offset enumToValue_offset;
316
317 // 4:
318 ValueMap* valueMap;
319 int32_t valueMap_size;
320 int32_t valueMap_count;
321 Offset valueMap_offset;
322
323 // for any i, one of valueEnumToName[i] or valueNCEnumToName[i] is
324 // NULL and one is not. valueEnumToName_size[i] is the size of
325 // the non-NULL one. i=0..valueMapCount-1
326 // 5a:
327 EnumToOffset** valueEnumToName;
328 // 5b:
329 NonContiguousEnumToOffset** valueNCEnumToName;
330 int32_t* valueEnumToName_size;
331 Offset* valueEnumToName_offset;
332 // 6:
333 // arrays of valueMap_count pointers, sizes, & offsets
334 NameToEnum** valueNameToEnum;
335 int32_t* valueNameToEnum_size;
336 Offset* valueNameToEnum_offset;
337
338 // 98:
339 Offset* nameGroupPool;
340 int32_t nameGroupPool_count;
341 int32_t nameGroupPool_size;
342 Offset nameGroupPool_offset;
343
344 // 99:
345 char* stringPool;
346 int32_t stringPool_count;
347 int32_t stringPool_size;
348 Offset stringPool_offset;
349 Offset* stringPool_offsetArray; // relative to stringPool
350
351 int32_t total_size; // size of everything
352
353 int32_t debug;
354
355 public:
356
357 Builder(int32_t debugLevel);
358 ~Builder();
359
360 void buildTopLevelProperties(const NameToEnumEntry* propName,
361 int32_t propNameCount,
362 const EnumToNameGroupEntry* propEnum,
363 int32_t propEnumCount);
364
365 void buildValues(const EnumToValueEntry* e2v,
366 int32_t count);
367
368 void buildStringPool(const AliasName* propertyNames,
369 int32_t propertyNameCount,
370 const int32_t* nameGroupIndices,
371 int32_t nameGroupIndicesCount);
372
373 void fixup();
374
375 int8_t* createData(int32_t& length) const;
376
377 private:
378
379 static EnumToOffset* buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
380 int32_t count,
381 int32_t& size);
382 static NonContiguousEnumToOffset*
383 buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
384 int32_t count,
385 int32_t& size);
386
387 static NonContiguousEnumToOffset*
388 buildNCEnumToValue(const EnumToValueEntry* e2v,
389 int32_t count,
390 int32_t& size);
391
392 static NameToEnum* buildNameToEnum(const NameToEnumEntry* nameToEnum,
393 int32_t count,
394 int32_t& size);
395
396 Offset stringIndexToOffset(int32_t index, UBool allowNeg=FALSE) const;
397 void fixupNameToEnum(NameToEnum* n);
398 void fixupEnumToNameGroup(EnumToOffset* e2ng);
399 void fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng);
400
401 void computeOffsets();
402 void fixupStringPoolOffsets();
403 void fixupNameGroupPoolOffsets();
404 void fixupMiscellaneousOffsets();
405
406 static int32_t align(int32_t a);
407 static void erase(void* p, int32_t size);
408 };
409
410 Builder::Builder(int32_t debugLevel) {
411 debug = debugLevel;
412 enumToName = 0;
413 nameToEnum = 0;
414 enumToValue = 0;
415 valueMap_count = 0;
416 valueMap = 0;
417 valueEnumToName = 0;
418 valueNCEnumToName = 0;
419 valueEnumToName_size = 0;
420 valueEnumToName_offset = 0;
421 valueNameToEnum = 0;
422 valueNameToEnum_size = 0;
423 valueNameToEnum_offset = 0;
424 nameGroupPool = 0;
425 stringPool = 0;
426 stringPool_offsetArray = 0;
427 }
428
429 Builder::~Builder() {
430 uprv_free(enumToName);
431 uprv_free(nameToEnum);
432 uprv_free(enumToValue);
433 uprv_free(valueMap);
434 for (int32_t i=0; i<valueMap_count; ++i) {
435 uprv_free(valueEnumToName[i]);
436 uprv_free(valueNCEnumToName[i]);
437 uprv_free(valueNameToEnum[i]);
438 }
439 uprv_free(valueEnumToName);
440 uprv_free(valueNCEnumToName);
441 uprv_free(valueEnumToName_size);
442 uprv_free(valueEnumToName_offset);
443 uprv_free(valueNameToEnum);
444 uprv_free(valueNameToEnum_size);
445 uprv_free(valueNameToEnum_offset);
446 uprv_free(nameGroupPool);
447 uprv_free(stringPool);
448 uprv_free(stringPool_offsetArray);
449 }
450
451 int32_t Builder::align(int32_t a) {
452 U_ASSERT(a >= 0);
453 int32_t k = a % sizeof(int32_t);
454 if (k == 0) {
455 return a;
456 }
457 a += sizeof(int32_t) - k;
458 return a;
459 }
460
461 void Builder::erase(void* p, int32_t size) {
462 U_ASSERT(size >= 0);
463 int8_t* q = (int8_t*) p;
464 while (size--) {
465 *q++ = 0;
466 }
467 }
468
469 EnumToOffset* Builder::buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
470 int32_t count,
471 int32_t& size) {
472 U_ASSERT(e2ng->isContiguous(count));
473 size = align(EnumToOffset::getSize(count));
474 EnumToOffset* result = (EnumToOffset*) uprv_malloc(size);
475 erase(result, size);
476 result->enumStart = e2ng->enumValue;
477 result->enumLimit = e2ng->enumValue + count;
478 Offset* p = result->getOffsetArray();
479 for (int32_t i=0; i<count; ++i) {
480 // set these to NGI index values
481 // fix them up to NGI offset values
482 U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
483 p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
484 }
485 return result;
486 }
487
488 NonContiguousEnumToOffset*
489 Builder::buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
490 int32_t count,
491 int32_t& size) {
492 U_ASSERT(!e2ng->isContiguous(count));
493 size = align(NonContiguousEnumToOffset::getSize(count));
494 NonContiguousEnumToOffset* nc = (NonContiguousEnumToOffset*) uprv_malloc(size);
495 erase(nc, size);
496 nc->count = count;
497 EnumValue* e = nc->getEnumArray();
498 Offset* p = nc->getOffsetArray();
499 for (int32_t i=0; i<count; ++i) {
500 // set these to NGI index values
501 // fix them up to NGI offset values
502 e[i] = e2ng[i].enumValue;
503 U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
504 p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
505 }
506 return nc;
507 }
508
509 NonContiguousEnumToOffset*
510 Builder::buildNCEnumToValue(const EnumToValueEntry* e2v,
511 int32_t count,
512 int32_t& size) {
513 U_ASSERT(!e2v->isContiguous(count));
514 size = align(NonContiguousEnumToOffset::getSize(count));
515 NonContiguousEnumToOffset* result = (NonContiguousEnumToOffset*) uprv_malloc(size);
516 erase(result, size);
517 result->count = count;
518 EnumValue* e = result->getEnumArray();
519 for (int32_t i=0; i<count; ++i) {
520 e[i] = e2v[i].enumValue;
521 // offset must be set later
522 }
523 return result;
524 }
525
526 /**
527 * Given an index into the string pool, return an offset. computeOffsets()
528 * must have been called already. If allowNegative is true, allow negatives
529 * and preserve their sign.
530 */
531 Offset Builder::stringIndexToOffset(int32_t index, UBool allowNegative) const {
532 // Index 0 is ""; we turn this into an Offset of zero
533 if (index == 0) return 0;
534 if (index < 0) {
535 if (allowNegative) {
536 return -Builder::stringIndexToOffset(-index);
537 } else {
538 die("Negative string pool index");
539 }
540 } else {
541 if (index >= stringPool_count) {
542 die("String pool index too large");
543 }
544 Offset result = stringPool_offset + stringPool_offsetArray[index];
545 U_ASSERT(result >= 0 && result < total_size);
546 return result;
547 }
548 return 0; // never executed; make compiler happy
549 }
550
551 NameToEnum* Builder::buildNameToEnum(const NameToEnumEntry* nameToEnum,
552 int32_t count,
553 int32_t& size) {
554 size = align(NameToEnum::getSize(count));
555 NameToEnum* n2e = (NameToEnum*) uprv_malloc(size);
556 erase(n2e, size);
557 n2e->count = count;
558 Offset* p = n2e->getNameArray();
559 EnumValue* e = n2e->getEnumArray();
560 for (int32_t i=0; i<count; ++i) {
561 // set these to SP index values
562 // fix them up to SP offset values
563 U_ASSERT(IS_VALID_OFFSET(nameToEnum[i].nameIndex));
564 p[i] = (Offset) nameToEnum[i].nameIndex; // FIXUP later
565 e[i] = nameToEnum[i].enumValue;
566 }
567 return n2e;
568 }
569
570
571 void Builder::buildTopLevelProperties(const NameToEnumEntry* propName,
572 int32_t propNameCount,
573 const EnumToNameGroupEntry* propEnum,
574 int32_t propEnumCount) {
575 enumToName = buildNCEnumToNameGroup(propEnum,
576 propEnumCount,
577 enumToName_size);
578 nameToEnum = buildNameToEnum(propName,
579 propNameCount,
580 nameToEnum_size);
581 }
582
583 void Builder::buildValues(const EnumToValueEntry* e2v,
584 int32_t count) {
585 int32_t i;
586
587 U_ASSERT(!e2v->isContiguous(count));
588
589 valueMap_count = count;
590
591 enumToValue = buildNCEnumToValue(e2v, count,
592 enumToValue_size);
593
594 valueMap_size = align(count * sizeof(ValueMap));
595 valueMap = (ValueMap*) uprv_malloc(valueMap_size);
596 erase(valueMap, valueMap_size);
597
598 valueEnumToName = MALLOC(EnumToOffset*, count);
599 valueNCEnumToName = MALLOC(NonContiguousEnumToOffset*, count);
600 valueEnumToName_size = MALLOC(int32_t, count);
601 valueEnumToName_offset = MALLOC(Offset, count);
602 valueNameToEnum = MALLOC(NameToEnum*, count);
603 valueNameToEnum_size = MALLOC(int32_t, count);
604 valueNameToEnum_offset = MALLOC(Offset, count);
605
606 for (i=0; i<count; ++i) {
607 UBool isContiguous =
608 e2v[i].enumToName->isContiguous(e2v[i].enumToName_count);
609 valueEnumToName[i] = 0;
610 valueNCEnumToName[i] = 0;
611 if (isContiguous) {
612 valueEnumToName[i] = buildEnumToOffset(e2v[i].enumToName,
613 e2v[i].enumToName_count,
614 valueEnumToName_size[i]);
615 } else {
616 valueNCEnumToName[i] = buildNCEnumToNameGroup(e2v[i].enumToName,
617 e2v[i].enumToName_count,
618 valueEnumToName_size[i]);
619 }
620 valueNameToEnum[i] =
621 buildNameToEnum(e2v[i].nameToEnum,
622 e2v[i].nameToEnum_count,
623 valueNameToEnum_size[i]);
624 }
625 }
626
627 void Builder::buildStringPool(const AliasName* propertyNames,
628 int32_t propertyNameCount,
629 const int32_t* nameGroupIndices,
630 int32_t nameGroupIndicesCount) {
631 int32_t i;
632
633 nameGroupPool_count = nameGroupIndicesCount;
634 nameGroupPool_size = sizeof(Offset) * nameGroupPool_count;
635 nameGroupPool = MALLOC(Offset, nameGroupPool_count);
636
637 for (i=0; i<nameGroupPool_count; ++i) {
638 // Some indices are negative.
639 int32_t a = nameGroupIndices[i];
640 if (a < 0) a = -a;
641 U_ASSERT(IS_VALID_OFFSET(a));
642 nameGroupPool[i] = (Offset) nameGroupIndices[i];
643 }
644
645 stringPool_count = propertyNameCount;
646 stringPool_size = 0;
647 // first string must be "" -- we skip it
648 U_ASSERT(*propertyNames[0].str == 0);
649 for (i=1 /*sic*/; i<propertyNameCount; ++i) {
650 stringPool_size += (int32_t)(uprv_strlen(propertyNames[i].str) + 1);
651 }
652 stringPool = MALLOC(char, stringPool_size);
653 stringPool_offsetArray = MALLOC(Offset, stringPool_count);
654 Offset soFar = 0;
655 char* p = stringPool;
656 stringPool_offsetArray[0] = -1; // we don't use this entry
657 for (i=1 /*sic*/; i<propertyNameCount; ++i) {
658 const char* str = propertyNames[i].str;
659 int32_t len = (int32_t)uprv_strlen(str);
660 uprv_strcpy(p, str);
661 p += len;
662 *p++ = 0;
663 stringPool_offsetArray[i] = soFar;
664 soFar += (Offset)(len+1);
665 }
666 U_ASSERT(soFar == stringPool_size);
667 U_ASSERT(p == (stringPool + stringPool_size));
668 }
669
670 // Confirm that PropertyAliases is a POD (plain old data; see C++
671 // std). The following union will _fail to compile_ if
672 // PropertyAliases is _not_ a POD. (Note: We used to use the offsetof
673 // macro to check this, but that's not quite right, so that test is
674 // commented out -- see below.)
675 typedef union {
676 int32_t i;
677 PropertyAliases p;
678 } PropertyAliasesPODTest;
679
680 void Builder::computeOffsets() {
681 int32_t i;
682 Offset off = sizeof(header);
683
684 if (debug>0) {
685 printf("header \t offset=%4d size=%5d\n", 0, off);
686 }
687
688 // PropertyAliases must have no v-table and must be
689 // padded (if necessary) to the next 32-bit boundary.
690 //U_ASSERT(offsetof(PropertyAliases, enumToName_offset) == 0); // see above
691 U_ASSERT(sizeof(header) % sizeof(int32_t) == 0);
692
693 #define COMPUTE_OFFSET(foo) COMPUTE_OFFSET2(foo,int32_t)
694
695 #define COMPUTE_OFFSET2(foo,type) \
696 if (debug>0)\
697 printf(#foo "\t offset=%4d size=%5d\n", off, (int)foo##_size);\
698 foo##_offset = off;\
699 U_ASSERT(IS_VALID_OFFSET(off + foo##_size));\
700 U_ASSERT(foo##_offset % sizeof(type) == 0);\
701 off = (Offset) (off + foo##_size);
702
703 COMPUTE_OFFSET(enumToName); // 0:
704 COMPUTE_OFFSET(nameToEnum); // 2:
705 COMPUTE_OFFSET(enumToValue); // 3:
706 COMPUTE_OFFSET(valueMap); // 4:
707
708 for (i=0; i<valueMap_count; ++i) {
709 if (debug>0) {
710 printf(" enumToName[%d]\t offset=%4d size=%5d\n",
711 (int)i, off, (int)valueEnumToName_size[i]);
712 }
713
714 valueEnumToName_offset[i] = off; // 5:
715 U_ASSERT(IS_VALID_OFFSET(off + valueEnumToName_size[i]));
716 off = (Offset) (off + valueEnumToName_size[i]);
717
718 if (debug>0) {
719 printf(" nameToEnum[%d]\t offset=%4d size=%5d\n",
720 (int)i, off, (int)valueNameToEnum_size[i]);
721 }
722
723 valueNameToEnum_offset[i] = off; // 6:
724 U_ASSERT(IS_VALID_OFFSET(off + valueNameToEnum_size[i]));
725 off = (Offset) (off + valueNameToEnum_size[i]);
726 }
727
728 // These last two chunks have weaker alignment needs
729 COMPUTE_OFFSET2(nameGroupPool,Offset); // 98:
730 COMPUTE_OFFSET2(stringPool,char); // 99:
731
732 total_size = off;
733 if (debug>0) printf("total size=%5d\n\n", (int)total_size);
734 U_ASSERT(total_size <= (MAX_OFFSET+1));
735 }
736
737 void Builder::fixupNameToEnum(NameToEnum* n) {
738 // Fix the string pool offsets in n
739 Offset* p = n->getNameArray();
740 for (int32_t i=0; i<n->count; ++i) {
741 p[i] = stringIndexToOffset(p[i]);
742 }
743 }
744
745 void Builder::fixupStringPoolOffsets() {
746 int32_t i;
747
748 // 2:
749 fixupNameToEnum(nameToEnum);
750
751 // 6:
752 for (i=0; i<valueMap_count; ++i) {
753 fixupNameToEnum(valueNameToEnum[i]);
754 }
755
756 // 98:
757 for (i=0; i<nameGroupPool_count; ++i) {
758 nameGroupPool[i] = stringIndexToOffset(nameGroupPool[i], TRUE);
759 }
760 }
761
762 void Builder::fixupEnumToNameGroup(EnumToOffset* e2ng) {
763 EnumValue i;
764 int32_t j;
765 Offset* p = e2ng->getOffsetArray();
766 for (i=e2ng->enumStart, j=0; i<e2ng->enumLimit; ++i, ++j) {
767 p[j] = nameGroupPool_offset + sizeof(Offset) * p[j];
768 }
769 }
770
771 void Builder::fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng) {
772 int32_t i;
773 /*EnumValue* e = e2ng->getEnumArray();*/
774 Offset* p = e2ng->getOffsetArray();
775 for (i=0; i<e2ng->count; ++i) {
776 p[i] = nameGroupPool_offset + sizeof(Offset) * p[i];
777 }
778 }
779
780 void Builder::fixupNameGroupPoolOffsets() {
781 int32_t i;
782
783 // 0:
784 fixupNCEnumToNameGroup(enumToName);
785
786 // 1: (deleted)
787
788 // 5:
789 for (i=0; i<valueMap_count; ++i) {
790 // 5a:
791 if (valueEnumToName[i] != 0) {
792 fixupEnumToNameGroup(valueEnumToName[i]);
793 }
794 // 5b:
795 if (valueNCEnumToName[i] != 0) {
796 fixupNCEnumToNameGroup(valueNCEnumToName[i]);
797 }
798 }
799 }
800
801 void Builder::fixupMiscellaneousOffsets() {
802 int32_t i;
803
804 // header:
805 erase(&header, sizeof(header));
806 header.enumToName_offset = enumToName_offset;
807 header.nameToEnum_offset = nameToEnum_offset;
808 header.enumToValue_offset = enumToValue_offset;
809 // header meta-info used by Java:
810 U_ASSERT(total_size > 0 && total_size < 0x7FFF);
811 header.total_size = (int16_t) total_size;
812 header.valueMap_offset = valueMap_offset;
813 header.valueMap_count = (int16_t) valueMap_count;
814 header.nameGroupPool_offset = nameGroupPool_offset;
815 header.nameGroupPool_count = (int16_t) nameGroupPool_count;
816 header.stringPool_offset = stringPool_offset;
817 header.stringPool_count = (int16_t) stringPool_count - 1; // don't include "" entry
818
819 U_ASSERT(valueMap_count <= 0x7FFF);
820 U_ASSERT(nameGroupPool_count <= 0x7FFF);
821 U_ASSERT(stringPool_count <= 0x7FFF);
822
823 // 3:
824 Offset* p = enumToValue->getOffsetArray();
825 /*EnumValue* e = enumToValue->getEnumArray();*/
826 U_ASSERT(valueMap_count == enumToValue->count);
827 for (i=0; i<valueMap_count; ++i) {
828 p[i] = (Offset)(valueMap_offset + sizeof(ValueMap) * i);
829 }
830
831 // 4:
832 for (i=0; i<valueMap_count; ++i) {
833 ValueMap& v = valueMap[i];
834 v.enumToName_offset = v.ncEnumToName_offset = 0;
835 if (valueEnumToName[i] != 0) {
836 v.enumToName_offset = valueEnumToName_offset[i];
837 }
838 if (valueNCEnumToName[i] != 0) {
839 v.ncEnumToName_offset = valueEnumToName_offset[i];
840 }
841 v.nameToEnum_offset = valueNameToEnum_offset[i];
842 }
843 }
844
845 void Builder::fixup() {
846 computeOffsets();
847 fixupStringPoolOffsets();
848 fixupNameGroupPoolOffsets();
849 fixupMiscellaneousOffsets();
850 }
851
852 int8_t* Builder::createData(int32_t& length) const {
853 length = total_size;
854 int8_t* result = MALLOC(int8_t, length);
855
856 int8_t* p = result;
857 int8_t* limit = result + length;
858
859 #define APPEND2(x, size) \
860 U_ASSERT((p+size)<=limit); \
861 uprv_memcpy(p, x, size); \
862 p += size
863
864 #define APPEND(x) APPEND2(x, x##_size)
865
866 APPEND2(&header, sizeof(header));
867 APPEND(enumToName);
868 APPEND(nameToEnum);
869 APPEND(enumToValue);
870 APPEND(valueMap);
871
872 for (int32_t i=0; i<valueMap_count; ++i) {
873 U_ASSERT((valueEnumToName[i] != 0 && valueNCEnumToName[i] == 0) ||
874 (valueEnumToName[i] == 0 && valueNCEnumToName[i] != 0));
875 if (valueEnumToName[i] != 0) {
876 APPEND2(valueEnumToName[i], valueEnumToName_size[i]);
877 }
878 if (valueNCEnumToName[i] != 0) {
879 APPEND2(valueNCEnumToName[i], valueEnumToName_size[i]);
880 }
881 APPEND2(valueNameToEnum[i], valueNameToEnum_size[i]);
882 }
883
884 APPEND(nameGroupPool);
885 APPEND(stringPool);
886
887 if (p != limit) {
888 fprintf(stderr, "p != limit; p = %p, limit = %p", p, limit);
889 exit(1);
890 }
891 return result;
892 }
893
894 // END Builder
895 //----------------------------------------------------------------------
896
897 /* UDataInfo cf. udata.h */
898 static UDataInfo dataInfo = {
899 sizeof(UDataInfo),
900 0,
901
902 U_IS_BIG_ENDIAN,
903 U_CHARSET_FAMILY,
904 sizeof(UChar),
905 0,
906
907 {PNAME_SIG_0, PNAME_SIG_1, PNAME_SIG_2, PNAME_SIG_3},
908 {PNAME_FORMAT_VERSION, 0, 0, 0}, /* formatVersion */
909 {VERSION_0, VERSION_1, VERSION_2, VERSION_3} /* Unicode version */
910 };
911
912 class genpname {
913
914 // command-line options
915 UBool useCopyright;
916 UBool verbose;
917 int32_t debug;
918
919 public:
920 int MMain(int argc, char *argv[]);
921
922 private:
923 NameToEnumEntry* createNameIndex(const AliasList& list,
924 int32_t& nameIndexCount);
925
926 EnumToNameGroupEntry* createEnumIndex(const AliasList& list);
927
928 int32_t writeDataFile(const char *destdir, const Builder&);
929 };
930
931 int main(int argc, char *argv[]) {
932 UErrorCode status = U_ZERO_ERROR;
933 u_init(&status);
934 if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
935 // Note: u_init() will try to open ICU property data.
936 // failures here are expected when building ICU from scratch.
937 // ignore them.
938 fprintf(stderr, "genpname: can not initialize ICU. Status = %s\n",
939 u_errorName(status));
940 exit(1);
941 }
942
943 genpname app;
944 U_MAIN_INIT_ARGS(argc, argv);
945 int retVal = app.MMain(argc, argv);
946 u_cleanup();
947 return retVal;
948 }
949
950 static UOption options[]={
951 UOPTION_HELP_H,
952 UOPTION_HELP_QUESTION_MARK,
953 UOPTION_COPYRIGHT,
954 UOPTION_DESTDIR,
955 UOPTION_VERBOSE,
956 UOPTION_DEF("debug", 'D', UOPT_REQUIRES_ARG),
957 };
958
959 NameToEnumEntry* genpname::createNameIndex(const AliasList& list,
960 int32_t& nameIndexCount) {
961
962 // Build name => enum map
963
964 // This is an n->1 map. There are typically multiple names
965 // mapping to one enum. The name index is sorted in order of the name,
966 // as defined by the uprv_compareAliasNames() function.
967
968 int32_t i, j;
969 int32_t count = list.count();
970
971 // compute upper limit on number of names in the index
972 int32_t nameIndexCapacity = count * MAX_NAMES_PER_GROUP;
973 NameToEnumEntry* nameIndex = MALLOC(NameToEnumEntry, nameIndexCapacity);
974
975 nameIndexCount = 0;
976 int32_t names[MAX_NAMES_PER_GROUP];
977 for (i=0; i<count; ++i) {
978 const Alias& p = list[i];
979 int32_t n = p.getUniqueNames(names);
980 for (j=0; j<n; ++j) {
981 U_ASSERT(nameIndexCount < nameIndexCapacity);
982 nameIndex[nameIndexCount++] =
983 NameToEnumEntry(names[j], p.enumValue);
984 }
985 }
986
987 /*
988 * use a stable sort to ensure consistent results between
989 * genpname.cpp and the propname.cpp swapping code
990 */
991 UErrorCode errorCode = U_ZERO_ERROR;
992 uprv_sortArray(nameIndex, nameIndexCount, sizeof(nameIndex[0]),
993 compareNameToEnumEntry, NULL, TRUE, &errorCode);
994 if (debug>1) {
995 printf("Alias names: %d\n", (int)nameIndexCount);
996 for (i=0; i<nameIndexCount; ++i) {
997 printf("%s => %d\n",
998 STRING_TABLE[nameIndex[i].nameIndex].str,
999 (int)nameIndex[i].enumValue);
1000 }
1001 printf("\n");
1002 }
1003 // make sure there are no duplicates. for a sorted list we need
1004 // only compare adjacent items. Alias.getUniqueNames() has
1005 // already eliminated duplicate names for a single property, which
1006 // does occur, so we're checking for duplicate names between two
1007 // properties, which should never occur.
1008 UBool ok = TRUE;
1009 for (i=1; i<nameIndexCount; ++i) {
1010 if (STRING_TABLE[nameIndex[i-1].nameIndex] ==
1011 STRING_TABLE[nameIndex[i].nameIndex]) {
1012 printf("Error: Duplicate names in property list: \"%s\", \"%s\"\n",
1013 STRING_TABLE[nameIndex[i-1].nameIndex].str,
1014 STRING_TABLE[nameIndex[i].nameIndex].str);
1015 ok = FALSE;
1016 }
1017 }
1018 if (!ok) {
1019 die("Two or more duplicate names in property list");
1020 }
1021
1022 return nameIndex;
1023 }
1024
1025 EnumToNameGroupEntry* genpname::createEnumIndex(const AliasList& list) {
1026
1027 // Build the enum => name map
1028
1029 // This is a 1->n map. Each enum maps to 1 or more names. To
1030 // accomplish this the index entry points to an element of the
1031 // NAME_GROUP array. This is the short name (which may be empty).
1032 // From there, subsequent elements of NAME_GROUP are alternate
1033 // names for this enum, up to and including the first one that is
1034 // negative (negate for actual index).
1035
1036 int32_t i, j, k;
1037 int32_t count = list.count();
1038
1039 EnumToNameGroupEntry* enumIndex = MALLOC(EnumToNameGroupEntry, count);
1040 for (i=0; i<count; ++i) {
1041 const Alias& p = list[i];
1042 enumIndex[i] = EnumToNameGroupEntry(p.enumValue, p.nameGroupIndex);
1043 }
1044
1045 UErrorCode errorCode = U_ZERO_ERROR;
1046 uprv_sortArray(enumIndex, count, sizeof(enumIndex[0]),
1047 compareEnumToNameGroupEntry, NULL, FALSE, &errorCode);
1048 if (debug>1) {
1049 printf("Property enums: %d\n", (int)count);
1050 for (i=0; i<count; ++i) {
1051 printf("%d => %d: ",
1052 (int)enumIndex[i].enumValue,
1053 (int)enumIndex[i].nameGroupIndex);
1054 UBool done = FALSE;
1055 for (j=enumIndex[i].nameGroupIndex; !done; ++j) {
1056 k = NAME_GROUP[j];
1057 if (k < 0) {
1058 k = -k;
1059 done = TRUE;
1060 }
1061 printf("\"%s\"", STRING_TABLE[k].str);
1062 if (!done) printf(", ");
1063 }
1064 printf("\n");
1065 }
1066 printf("\n");
1067 }
1068 return enumIndex;
1069 }
1070
1071 int genpname::MMain(int argc, char* argv[])
1072 {
1073 int32_t i, j;
1074 UErrorCode status = U_ZERO_ERROR;
1075
1076 u_init(&status);
1077 if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
1078 fprintf(stderr, "Error: u_init returned %s\n", u_errorName(status));
1079 status = U_ZERO_ERROR;
1080 }
1081
1082
1083 /* preset then read command line options */
1084 options[3].value=u_getDataDirectory();
1085 argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
1086
1087 /* error handling, printing usage message */
1088 if (argc<0) {
1089 fprintf(stderr,
1090 "error in command line argument \"%s\"\n",
1091 argv[-argc]);
1092 }
1093
1094 debug = options[5].doesOccur ? (*options[5].value - '0') : 0;
1095
1096 if (argc!=1 || options[0].doesOccur || options[1].doesOccur ||
1097 debug < 0 || debug > 9) {
1098 fprintf(stderr,
1099 "usage: %s [-options]\n"
1100 "\tcreate " PNAME_DATA_NAME "." PNAME_DATA_TYPE "\n"
1101 "options:\n"
1102 "\t-h or -? or --help this usage text\n"
1103 "\t-v or --verbose turn on verbose output\n"
1104 "\t-c or --copyright include a copyright notice\n"
1105 "\t-d or --destdir destination directory, followed by the path\n"
1106 "\t-D or --debug 0..9 emit debugging messages (if > 0)\n",
1107 argv[0]);
1108 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
1109 }
1110
1111 /* get the options values */
1112 useCopyright=options[2].doesOccur;
1113 verbose = options[4].doesOccur;
1114
1115 // ------------------------------------------------------------
1116 // Do not sort the string table, instead keep it in data.h order.
1117 // This simplifies data swapping and testing thereof because the string
1118 // table itself need not be sorted during swapping.
1119 // The NameToEnum sorter sorts each such map's string offsets instead.
1120
1121 if (debug>1) {
1122 printf("String pool: %d\n", (int)STRING_COUNT);
1123 for (i=0; i<STRING_COUNT; ++i) {
1124 if (i != 0) {
1125 printf(", ");
1126 }
1127 printf("%s (%d)", STRING_TABLE[i].str, (int)STRING_TABLE[i].index);
1128 }
1129 printf("\n\n");
1130 }
1131
1132 // ------------------------------------------------------------
1133 // Create top-level property indices
1134
1135 PropertyArrayList props(PROPERTY, PROPERTY_COUNT);
1136 int32_t propNameCount;
1137 NameToEnumEntry* propName = createNameIndex(props, propNameCount);
1138 EnumToNameGroupEntry* propEnum = createEnumIndex(props);
1139
1140 // ------------------------------------------------------------
1141 // Create indices for the value list for each enumerated property
1142
1143 // This will have more entries than we need...
1144 EnumToValueEntry* enumToValue = MALLOC(EnumToValueEntry, PROPERTY_COUNT);
1145 int32_t enumToValue_count = 0;
1146 for (i=0, j=0; i<PROPERTY_COUNT; ++i) {
1147 if (PROPERTY[i].valueCount == 0) continue;
1148 AliasArrayList values(PROPERTY[i].valueList,
1149 PROPERTY[i].valueCount);
1150 enumToValue[j].enumValue = PROPERTY[i].enumValue;
1151 enumToValue[j].enumToName = createEnumIndex(values);
1152 enumToValue[j].enumToName_count = PROPERTY[i].valueCount;
1153 enumToValue[j].nameToEnum = createNameIndex(values,
1154 enumToValue[j].nameToEnum_count);
1155 ++j;
1156 }
1157 enumToValue_count = j;
1158
1159 uprv_sortArray(enumToValue, enumToValue_count, sizeof(enumToValue[0]),
1160 compareEnumToValueEntry, NULL, FALSE, &status);
1161
1162 // ------------------------------------------------------------
1163 // Build PropertyAliases layout in memory
1164
1165 Builder builder(debug);
1166
1167 builder.buildTopLevelProperties(propName,
1168 propNameCount,
1169 propEnum,
1170 PROPERTY_COUNT);
1171
1172 builder.buildValues(enumToValue,
1173 enumToValue_count);
1174
1175 builder.buildStringPool(STRING_TABLE,
1176 STRING_COUNT,
1177 NAME_GROUP,
1178 NAME_GROUP_COUNT);
1179
1180 builder.fixup();
1181
1182 ////////////////////////////////////////////////////////////
1183 // Write the output file
1184 ////////////////////////////////////////////////////////////
1185 int32_t wlen = writeDataFile(options[3].value, builder);
1186 if (verbose) {
1187 fprintf(stdout, "Output file: %s.%s, %ld bytes\n",
1188 U_ICUDATA_NAME "_" PNAME_DATA_NAME, PNAME_DATA_TYPE, (long)wlen);
1189 }
1190
1191 return 0; // success
1192 }
1193
1194 int32_t genpname::writeDataFile(const char *destdir, const Builder& builder) {
1195 int32_t length;
1196 int8_t* data = builder.createData(length);
1197
1198 UNewDataMemory *pdata;
1199 UErrorCode status = U_ZERO_ERROR;
1200
1201 pdata = udata_create(destdir, PNAME_DATA_TYPE, PNAME_DATA_NAME, &dataInfo,
1202 useCopyright ? U_COPYRIGHT_STRING : 0, &status);
1203 if (U_FAILURE(status)) {
1204 die("Unable to create data memory");
1205 }
1206
1207 udata_writeBlock(pdata, data, length);
1208
1209 int32_t dataLength = (int32_t) udata_finish(pdata, &status);
1210 if (U_FAILURE(status)) {
1211 die("Error writing output file");
1212 }
1213 if (dataLength != length) {
1214 die("Written file doesn't match expected size");
1215 }
1216
1217 return dataLength;
1218 }
1219
1220 //eof