]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/genpname/genpname.cpp
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / tools / genpname / genpname.cpp
1 /*
2 **********************************************************************
3 * Copyright (C) 2002, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 10/11/02 aliu Creation.
8 **********************************************************************
9 */
10
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include "unicode/utypes.h"
14 #include "unicode/putil.h"
15 #include "cmemory.h"
16 #include "cstring.h"
17 #include "filestrm.h"
18 #include "unewdata.h"
19 #include "uoptions.h"
20 #include "uprops.h"
21 #include "propname.h"
22 #include "uassert.h"
23
24 // TODO: Clean up and comment this code.
25
26 //----------------------------------------------------------------------
27 // BEGIN DATA
28 //
29 // This is the raw data to be output. We define the data structure,
30 // then include a machine-generated header that contains the actual
31 // data.
32
33 #include "unicode/uchar.h"
34 #include "unicode/uscript.h"
35
36 class AliasName {
37 public:
38 const char* str;
39 int32_t index;
40
41 AliasName(const char* str, int32_t index);
42
43 int compare(const AliasName& other) const;
44
45 UBool operator==(const AliasName& other) const {
46 return compare(other) == 0;
47 }
48
49 UBool operator!=(const AliasName& other) const {
50 return compare(other) != 0;
51 }
52 };
53
54 AliasName::AliasName(const char* _str,
55 int32_t _index) :
56 str(_str),
57 index(_index)
58 {
59 }
60
61 int AliasName::compare(const AliasName& other) const {
62 return uprv_comparePropertyNames(str, other.str);
63 }
64
65 class Alias {
66 public:
67 int32_t enumValue;
68 int32_t nameGroupIndex;
69
70 Alias(int32_t enumValue,
71 int32_t nameGroupIndex);
72
73 int32_t getUniqueNames(int32_t* nameGroupIndices) const;
74 };
75
76 Alias::Alias(int32_t anEnumValue,
77 int32_t aNameGroupIndex) :
78 enumValue(anEnumValue),
79 nameGroupIndex(aNameGroupIndex)
80 {
81 }
82
83 class Property : public Alias {
84 public:
85 int32_t valueCount;
86 const Alias* valueList;
87
88 Property(int32_t enumValue,
89 int32_t nameGroupIndex,
90 int32_t valueCount,
91 const Alias* valueList);
92 };
93
94 Property::Property(int32_t _enumValue,
95 int32_t _nameGroupIndex,
96 int32_t _valueCount,
97 const Alias* _valueList) :
98 Alias(_enumValue, _nameGroupIndex),
99 valueCount(_valueCount),
100 valueList(_valueList)
101 {
102 }
103
104 // *** Include the data header ***
105 #include "data.h"
106
107 /* return a list of unique names, not including "", for this property
108 * @param stringIndices array of at least MAX_NAMES_PER_GROUP
109 * elements, will be filled with indices into STRING_TABLE
110 * @return number of indices, >= 1
111 */
112 int32_t Alias::getUniqueNames(int32_t* stringIndices) const {
113 int32_t count = 0;
114 int32_t i = nameGroupIndex;
115 UBool done = FALSE;
116 while (!done) {
117 int32_t j = NAME_GROUP[i++];
118 if (j < 0) {
119 done = TRUE;
120 j = -j;
121 }
122 if (j == 0) continue; // omit "" entries
123 UBool dupe = FALSE;
124 for (int32_t k=0; k<count; ++k) {
125 if (stringIndices[k] == j) {
126 dupe = TRUE;
127 break;
128 }
129 // also do a string check for things like "age|Age"
130 if (STRING_TABLE[stringIndices[k]] == STRING_TABLE[j]) {
131 //printf("Found dupe %s|%s\n",
132 // STRING_TABLE[stringIndices[k]].str,
133 // STRING_TABLE[j].str);
134 dupe = TRUE;
135 break;
136 }
137 }
138 if (dupe) continue; // omit duplicates
139 stringIndices[count++] = j;
140 }
141 return count;
142 }
143
144 // END DATA
145 //----------------------------------------------------------------------
146
147 #define MALLOC(type, count) \
148 (type*) uprv_malloc(sizeof(type) * count)
149
150 void die(const char* msg) {
151 fprintf(stderr, "Error: %s\n", msg);
152 exit(1);
153 }
154
155 //----------------------------------------------------------------------
156
157 /**
158 * A list of Alias objects.
159 */
160 class AliasList {
161 public:
162 virtual const Alias& operator[](int32_t i) const = 0;
163 virtual int32_t count() const = 0;
164 };
165
166 /**
167 * A single array.
168 */
169 class AliasArrayList : public AliasList {
170 const Alias* a;
171 int32_t n;
172 public:
173 AliasArrayList(const Alias* _a, int32_t _n) {
174 a = _a;
175 n = _n;
176 }
177 virtual const Alias& operator[](int32_t i) const {
178 return a[i];
179 }
180 virtual int32_t count() const {
181 return n;
182 }
183 };
184
185 /**
186 * A single array.
187 */
188 class PropertyArrayList : public AliasList {
189 const Property* a;
190 int32_t n;
191 public:
192 PropertyArrayList(const Property* _a, int32_t _n) {
193 a = _a;
194 n = _n;
195 }
196 virtual const Alias& operator[](int32_t i) const {
197 return a[i];
198 }
199 virtual int32_t count() const {
200 return n;
201 }
202 };
203
204 //----------------------------------------------------------------------
205
206 /**
207 * An element in a name index. It maps a name (given by index) into
208 * an enum value.
209 */
210 class NameToEnumEntry {
211 public:
212 int32_t nameIndex;
213 int32_t enumValue;
214 NameToEnumEntry(int32_t a, int32_t b) { nameIndex=a; enumValue=b; }
215 };
216
217 // Sort function for NameToEnumEntry (sort by name index)
218 U_CAPI int compareNameToEnumEntry(const void* e1, const void* e2) {
219 return ((NameToEnumEntry*)e1)->nameIndex - ((NameToEnumEntry*)e2)->nameIndex;
220 }
221
222 //----------------------------------------------------------------------
223
224 /**
225 * An element in an enum index. It maps an enum into a name group entry
226 * (given by index).
227 */
228 class EnumToNameGroupEntry {
229 public:
230 int32_t enumValue;
231 int32_t nameGroupIndex;
232 EnumToNameGroupEntry(int32_t a, int32_t b) { enumValue=a; nameGroupIndex=b; }
233
234 // are enumValues contiguous for count entries starting with this one?
235 // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
236 UBool isContiguous(int32_t count) const {
237 const EnumToNameGroupEntry* p = this;
238 for (int32_t i=1; i<count; ++i) {
239 if (p[i].enumValue != (this->enumValue + i)) {
240 return FALSE;
241 }
242 }
243 return TRUE;
244 }
245 };
246
247 // Sort function for EnumToNameGroupEntry (sort by name index)
248 U_CAPI int compareEnumToNameGroupEntry(const void* e1, const void* e2) {
249 return ((EnumToNameGroupEntry*)e1)->enumValue - ((EnumToNameGroupEntry*)e2)->enumValue;
250 }
251
252 //----------------------------------------------------------------------
253
254 /**
255 * An element in the map from enumerated property enums to value maps.
256 */
257 class EnumToValueEntry {
258 public:
259 int32_t enumValue;
260 EnumToNameGroupEntry* enumToName;
261 int32_t enumToName_count;
262 NameToEnumEntry* nameToEnum;
263 int32_t nameToEnum_count;
264
265 // are enumValues contiguous for count entries starting with this one?
266 // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
267 UBool isContiguous(int32_t count) const {
268 const EnumToValueEntry* p = this;
269 for (int32_t i=1; i<count; ++i) {
270 if (p[i].enumValue != (this->enumValue + i)) {
271 return FALSE;
272 }
273 }
274 return TRUE;
275 }
276 };
277
278 // Sort function for EnumToValueEntry (sort by enum)
279 U_CAPI int compareEnumToValueEntry(const void* e1, const void* e2) {
280 return ((EnumToValueEntry*)e1)->enumValue - ((EnumToValueEntry*)e2)->enumValue;
281 }
282
283 //----------------------------------------------------------------------
284 // BEGIN Builder
285
286 #define IS_VALID_OFFSET(x) (((x)>=0)&&((x)<=MAX_OFFSET))
287
288 class Builder {
289 // header:
290 PropertyAliases header;
291
292 // 0:
293 NonContiguousEnumToOffset* enumToName;
294 int32_t enumToName_size;
295 Offset enumToName_offset;
296
297 // 1: (deleted)
298
299 // 2:
300 NameToEnum* nameToEnum;
301 int32_t nameToEnum_size;
302 Offset nameToEnum_offset;
303
304 // 3:
305 NonContiguousEnumToOffset* enumToValue;
306 int32_t enumToValue_size;
307 Offset enumToValue_offset;
308
309 // 4:
310 ValueMap* valueMap;
311 int32_t valueMap_size;
312 int32_t valueMap_count;
313 Offset valueMap_offset;
314
315 // for any i, one of valueEnumToName[i] or valueNCEnumToName[i] is
316 // NULL and one is not. valueEnumToName_size[i] is the size of
317 // the non-NULL one. i=0..valueMapCount-1
318 // 5a:
319 EnumToOffset** valueEnumToName;
320 // 5b:
321 NonContiguousEnumToOffset** valueNCEnumToName;
322 int32_t* valueEnumToName_size;
323 Offset* valueEnumToName_offset;
324 // 6:
325 // arrays of valueMap_count pointers, sizes, & offsets
326 NameToEnum** valueNameToEnum;
327 int32_t* valueNameToEnum_size;
328 Offset* valueNameToEnum_offset;
329
330 // 98:
331 Offset* nameGroupPool;
332 int32_t nameGroupPool_count;
333 int32_t nameGroupPool_size;
334 Offset nameGroupPool_offset;
335
336 // 99:
337 char* stringPool;
338 int32_t stringPool_count;
339 int32_t stringPool_size;
340 Offset stringPool_offset;
341 Offset* stringPool_offsetArray; // relative to stringPool
342
343 int32_t total_size; // size of everything
344
345 int32_t debug;
346
347 public:
348
349 Builder(int32_t debugLevel);
350 ~Builder();
351
352 void buildTopLevelProperties(const NameToEnumEntry* propName,
353 int32_t propNameCount,
354 const EnumToNameGroupEntry* propEnum,
355 int32_t propEnumCount);
356
357 void buildValues(const EnumToValueEntry* e2v,
358 int32_t count);
359
360 void buildStringPool(const AliasName* propertyNames,
361 int32_t propertyNameCount,
362 const int32_t* nameGroupIndices,
363 int32_t nameGroupIndicesCount);
364
365 void fixup();
366
367 int8_t* createData(int32_t& length) const;
368
369 private:
370
371 static EnumToOffset* buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
372 int32_t count,
373 int32_t& size);
374 static NonContiguousEnumToOffset*
375 buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
376 int32_t count,
377 int32_t& size);
378
379 static NonContiguousEnumToOffset*
380 buildNCEnumToValue(const EnumToValueEntry* e2v,
381 int32_t count,
382 int32_t& size);
383
384 static NameToEnum* buildNameToEnum(const NameToEnumEntry* nameToEnum,
385 int32_t count,
386 int32_t& size);
387
388 Offset stringIndexToOffset(int32_t index, UBool allowNeg=FALSE) const;
389 void fixupNameToEnum(NameToEnum* n);
390 void fixupEnumToNameGroup(EnumToOffset* e2ng);
391 void fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng);
392
393 void computeOffsets();
394 void fixupStringPoolOffsets();
395 void fixupNameGroupPoolOffsets();
396 void fixupMiscellaneousOffsets();
397
398 static int32_t align(int32_t a);
399 static void erase(void* p, int32_t size);
400 };
401
402 Builder::Builder(int32_t debugLevel) {
403 debug = debugLevel;
404 enumToName = 0;
405 nameToEnum = 0;
406 enumToValue = 0;
407 valueMap_count = 0;
408 valueMap = 0;
409 valueEnumToName = 0;
410 valueNCEnumToName = 0;
411 valueEnumToName_size = 0;
412 valueEnumToName_offset = 0;
413 valueNameToEnum = 0;
414 valueNameToEnum_size = 0;
415 valueNameToEnum_offset = 0;
416 nameGroupPool = 0;
417 stringPool = 0;
418 stringPool_offsetArray = 0;
419 }
420
421 Builder::~Builder() {
422 uprv_free(enumToName);
423 uprv_free(nameToEnum);
424 uprv_free(enumToValue);
425 uprv_free(valueMap);
426 for (int32_t i=0; i<valueMap_count; ++i) {
427 uprv_free(valueEnumToName[i]);
428 uprv_free(valueNCEnumToName[i]);
429 uprv_free(valueNameToEnum[i]);
430 }
431 uprv_free(valueEnumToName);
432 uprv_free(valueNCEnumToName);
433 uprv_free(valueEnumToName_size);
434 uprv_free(valueEnumToName_offset);
435 uprv_free(valueNameToEnum);
436 uprv_free(valueNameToEnum_size);
437 uprv_free(valueNameToEnum_offset);
438 uprv_free(nameGroupPool);
439 uprv_free(stringPool);
440 uprv_free(stringPool_offsetArray);
441 }
442
443 int32_t Builder::align(int32_t a) {
444 U_ASSERT(a >= 0);
445 int32_t k = a % sizeof(int32_t);
446 if (k == 0) {
447 return a;
448 }
449 a += sizeof(int32_t) - k;
450 return a;
451 }
452
453 void Builder::erase(void* p, int32_t size) {
454 U_ASSERT(size >= 0);
455 int8_t* q = (int8_t*) p;
456 while (size--) {
457 *q++ = 0;
458 }
459 }
460
461 EnumToOffset* Builder::buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
462 int32_t count,
463 int32_t& size) {
464 U_ASSERT(e2ng->isContiguous(count));
465 size = align(EnumToOffset::getSize(count));
466 EnumToOffset* result = (EnumToOffset*) uprv_malloc(size);
467 erase(result, size);
468 result->enumStart = e2ng->enumValue;
469 result->enumLimit = e2ng->enumValue + count;
470 Offset* p = result->getOffsetArray();
471 for (int32_t i=0; i<count; ++i) {
472 // set these to NGI index values
473 // fix them up to NGI offset values
474 U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
475 p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
476 }
477 return result;
478 }
479
480 NonContiguousEnumToOffset*
481 Builder::buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
482 int32_t count,
483 int32_t& size) {
484 U_ASSERT(!e2ng->isContiguous(count));
485 size = align(NonContiguousEnumToOffset::getSize(count));
486 NonContiguousEnumToOffset* nc = (NonContiguousEnumToOffset*) uprv_malloc(size);
487 erase(nc, size);
488 nc->count = count;
489 EnumValue* e = nc->getEnumArray();
490 Offset* p = nc->getOffsetArray();
491 for (int32_t i=0; i<count; ++i) {
492 // set these to NGI index values
493 // fix them up to NGI offset values
494 e[i] = e2ng[i].enumValue;
495 U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
496 p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
497 }
498 return nc;
499 }
500
501 NonContiguousEnumToOffset*
502 Builder::buildNCEnumToValue(const EnumToValueEntry* e2v,
503 int32_t count,
504 int32_t& size) {
505 U_ASSERT(!e2v->isContiguous(count));
506 size = align(NonContiguousEnumToOffset::getSize(count));
507 NonContiguousEnumToOffset* result = (NonContiguousEnumToOffset*) uprv_malloc(size);
508 erase(result, size);
509 result->count = count;
510 EnumValue* e = result->getEnumArray();
511 for (int32_t i=0; i<count; ++i) {
512 e[i] = e2v[i].enumValue;
513 // offset must be set later
514 }
515 return result;
516 }
517
518 /**
519 * Given an index into the string pool, return an offset. computeOffsets()
520 * must have been called already. If allowNegative is true, allow negatives
521 * and preserve their sign.
522 */
523 Offset Builder::stringIndexToOffset(int32_t index, UBool allowNegative) const {
524 // Index 0 is ""; we turn this into an Offset of zero
525 if (index == 0) return 0;
526 if (index < 0) {
527 if (allowNegative) {
528 return -Builder::stringIndexToOffset(-index);
529 } else {
530 die("Negative string pool index");
531 }
532 } else {
533 if (index >= stringPool_count) {
534 die("String pool index too large");
535 }
536 Offset result = stringPool_offset + stringPool_offsetArray[index];
537 U_ASSERT(result >= 0 && result < total_size);
538 return result;
539 }
540 return 0; // never executed; make compiler happy
541 }
542
543 NameToEnum* Builder::buildNameToEnum(const NameToEnumEntry* nameToEnum,
544 int32_t count,
545 int32_t& size) {
546 size = align(NameToEnum::getSize(count));
547 NameToEnum* n2e = (NameToEnum*) uprv_malloc(size);
548 erase(n2e, size);
549 n2e->count = count;
550 Offset* p = n2e->getNameArray();
551 EnumValue* e = n2e->getEnumArray();
552 for (int32_t i=0; i<count; ++i) {
553 // set these to SP index values
554 // fix them up to SP offset values
555 U_ASSERT(IS_VALID_OFFSET(nameToEnum[i].nameIndex));
556 p[i] = (Offset) nameToEnum[i].nameIndex; // FIXUP later
557 e[i] = nameToEnum[i].enumValue;
558 }
559 return n2e;
560 }
561
562
563 void Builder::buildTopLevelProperties(const NameToEnumEntry* propName,
564 int32_t propNameCount,
565 const EnumToNameGroupEntry* propEnum,
566 int32_t propEnumCount) {
567 enumToName = buildNCEnumToNameGroup(propEnum,
568 propEnumCount,
569 enumToName_size);
570 nameToEnum = buildNameToEnum(propName,
571 propNameCount,
572 nameToEnum_size);
573 }
574
575 void Builder::buildValues(const EnumToValueEntry* e2v,
576 int32_t count) {
577 int32_t i;
578
579 U_ASSERT(!e2v->isContiguous(count));
580
581 valueMap_count = count;
582
583 enumToValue = buildNCEnumToValue(e2v, count,
584 enumToValue_size);
585
586 valueMap_size = align(count * sizeof(ValueMap));
587 valueMap = (ValueMap*) uprv_malloc(valueMap_size);
588 erase(valueMap, valueMap_size);
589
590 valueEnumToName = MALLOC(EnumToOffset*, count);
591 valueNCEnumToName = MALLOC(NonContiguousEnumToOffset*, count);
592 valueEnumToName_size = MALLOC(int32_t, count);
593 valueEnumToName_offset = MALLOC(Offset, count);
594 valueNameToEnum = MALLOC(NameToEnum*, count);
595 valueNameToEnum_size = MALLOC(int32_t, count);
596 valueNameToEnum_offset = MALLOC(Offset, count);
597
598 for (i=0; i<count; ++i) {
599 UBool isContiguous =
600 e2v[i].enumToName->isContiguous(e2v[i].enumToName_count);
601 valueEnumToName[i] = 0;
602 valueNCEnumToName[i] = 0;
603 if (isContiguous) {
604 valueEnumToName[i] = buildEnumToOffset(e2v[i].enumToName,
605 e2v[i].enumToName_count,
606 valueEnumToName_size[i]);
607 } else {
608 valueNCEnumToName[i] = buildNCEnumToNameGroup(e2v[i].enumToName,
609 e2v[i].enumToName_count,
610 valueEnumToName_size[i]);
611 }
612 valueNameToEnum[i] =
613 buildNameToEnum(e2v[i].nameToEnum,
614 e2v[i].nameToEnum_count,
615 valueNameToEnum_size[i]);
616 }
617 }
618
619 void Builder::buildStringPool(const AliasName* propertyNames,
620 int32_t propertyNameCount,
621 const int32_t* nameGroupIndices,
622 int32_t nameGroupIndicesCount) {
623 int32_t i;
624
625 nameGroupPool_count = nameGroupIndicesCount;
626 nameGroupPool_size = sizeof(Offset) * nameGroupPool_count;
627 nameGroupPool = MALLOC(Offset, nameGroupPool_count);
628
629 for (i=0; i<nameGroupPool_count; ++i) {
630 // Some indices are negative.
631 int32_t a = nameGroupIndices[i];
632 if (a < 0) a = -a;
633 U_ASSERT(IS_VALID_OFFSET(a));
634 nameGroupPool[i] = (Offset) nameGroupIndices[i];
635 }
636
637 stringPool_count = propertyNameCount;
638 stringPool_size = 0;
639 // first string must be "" -- we skip it
640 U_ASSERT(*propertyNames[0].str == 0);
641 for (i=1 /*sic*/; i<propertyNameCount; ++i) {
642 stringPool_size += uprv_strlen(propertyNames[i].str) + 1;
643 }
644 stringPool = MALLOC(char, stringPool_size);
645 stringPool_offsetArray = MALLOC(Offset, stringPool_count);
646 Offset soFar = 0;
647 char* p = stringPool;
648 stringPool_offsetArray[0] = -1; // we don't use this entry
649 for (i=1 /*sic*/; i<propertyNameCount; ++i) {
650 const char* str = propertyNames[i].str;
651 int32_t len = uprv_strlen(str);
652 uprv_strcpy(p, str);
653 p += len;
654 *p++ = 0;
655 stringPool_offsetArray[i] = soFar;
656 soFar += (Offset)(len+1);
657 }
658 U_ASSERT(soFar == stringPool_size);
659 U_ASSERT(p == (stringPool + stringPool_size));
660 }
661
662 // Confirm that PropertyAliases is a POD (plain old data; see C++
663 // std). The following union will _fail to compile_ if
664 // PropertyAliases is _not_ a POD. (Note: We used to use the offsetof
665 // macro to check this, but that's not quite right, so that test is
666 // commented out -- see below.)
667 typedef union {
668 int32_t i;
669 PropertyAliases p;
670 } PropertyAliasesPODTest;
671
672 void Builder::computeOffsets() {
673 int32_t i;
674 Offset off = sizeof(header);
675
676 if (debug>0) {
677 printf("header \t offset=%4d size=%5d\n", 0, off);
678 }
679
680 // PropertyAliases must have no v-table and must be
681 // padded (if necessary) to the next 32-bit boundary.
682 //U_ASSERT(offsetof(PropertyAliases, enumToName_offset) == 0); // see above
683 U_ASSERT(sizeof(header) % sizeof(int32_t) == 0);
684
685 #define COMPUTE_OFFSET(foo) COMPUTE_OFFSET2(foo,int32_t)
686
687 #define COMPUTE_OFFSET2(foo,type) \
688 if (debug>0) printf(#foo "\t offset=%4d size=%5d\n", off, foo##_size); \
689 foo##_offset = off; \
690 U_ASSERT(IS_VALID_OFFSET(off + foo##_size)); \
691 U_ASSERT(foo##_offset % sizeof(type) == 0); \
692 off = (Offset) (off + foo##_size);
693
694 COMPUTE_OFFSET(enumToName); // 0:
695 COMPUTE_OFFSET(nameToEnum); // 2:
696 COMPUTE_OFFSET(enumToValue); // 3:
697 COMPUTE_OFFSET(valueMap); // 4:
698
699 for (i=0; i<valueMap_count; ++i) {
700 if (debug>0) {
701 printf(" enumToName[%d]\t offset=%4d size=%5d\n",
702 i, off, valueEnumToName_size[i]);
703 }
704
705 valueEnumToName_offset[i] = off; // 5:
706 U_ASSERT(IS_VALID_OFFSET(off + valueEnumToName_size[i]));
707 off = (Offset) (off + valueEnumToName_size[i]);
708
709 if (debug>0) {
710 printf(" nameToEnum[%d]\t offset=%4d size=%5d\n",
711 i, off, valueNameToEnum_size[i]);
712 }
713
714 valueNameToEnum_offset[i] = off; // 6:
715 U_ASSERT(IS_VALID_OFFSET(off + valueNameToEnum_size[i]));
716 off = (Offset) (off + valueNameToEnum_size[i]);
717 }
718
719 // These last two chunks have weaker alignment needs
720 COMPUTE_OFFSET2(nameGroupPool,Offset); // 98:
721 COMPUTE_OFFSET2(stringPool,char); // 99:
722
723 total_size = off;
724 if (debug>0) printf("total size=%5d\n\n", total_size);
725 U_ASSERT(total_size <= (MAX_OFFSET+1));
726 }
727
728 void Builder::fixupNameToEnum(NameToEnum* n) {
729 // Fix the string pool offsets in n
730 Offset* p = n->getNameArray();
731 for (int32_t i=0; i<n->count; ++i) {
732 p[i] = stringIndexToOffset(p[i]);
733 }
734 }
735
736 void Builder::fixupStringPoolOffsets() {
737 int32_t i;
738
739 // 2:
740 fixupNameToEnum(nameToEnum);
741
742 // 6:
743 for (i=0; i<valueMap_count; ++i) {
744 fixupNameToEnum(valueNameToEnum[i]);
745 }
746
747 // 98:
748 for (i=0; i<nameGroupPool_count; ++i) {
749 nameGroupPool[i] = stringIndexToOffset(nameGroupPool[i], TRUE);
750 }
751 }
752
753 void Builder::fixupEnumToNameGroup(EnumToOffset* e2ng) {
754 EnumValue i;
755 int32_t j;
756 Offset* p = e2ng->getOffsetArray();
757 for (i=e2ng->enumStart, j=0; i<e2ng->enumLimit; ++i, ++j) {
758 p[j] = nameGroupPool_offset + sizeof(Offset) * p[j];
759 }
760 }
761
762 void Builder::fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng) {
763 int32_t i;
764 EnumValue* e = e2ng->getEnumArray();
765 Offset* p = e2ng->getOffsetArray();
766 for (i=0; i<e2ng->count; ++i) {
767 p[i] = nameGroupPool_offset + sizeof(Offset) * p[i];
768 }
769 }
770
771 void Builder::fixupNameGroupPoolOffsets() {
772 int32_t i;
773
774 // 0:
775 fixupNCEnumToNameGroup(enumToName);
776
777 // 1: (deleted)
778
779 // 5:
780 for (i=0; i<valueMap_count; ++i) {
781 // 5a:
782 if (valueEnumToName[i] != 0) {
783 fixupEnumToNameGroup(valueEnumToName[i]);
784 }
785 // 5b:
786 if (valueNCEnumToName[i] != 0) {
787 fixupNCEnumToNameGroup(valueNCEnumToName[i]);
788 }
789 }
790 }
791
792 void Builder::fixupMiscellaneousOffsets() {
793 int32_t i;
794
795 // header:
796 erase(&header, sizeof(header));
797 header.enumToName_offset = enumToName_offset;
798 header.nameToEnum_offset = nameToEnum_offset;
799 header.enumToValue_offset = enumToValue_offset;
800 // header meta-info used by Java:
801 U_ASSERT(total_size > 0 && total_size < 0x7FFF);
802 header.total_size = (int16_t) total_size;
803 header.valueMap_offset = valueMap_offset;
804 header.valueMap_count = (int16_t) valueMap_count;
805 header.nameGroupPool_offset = nameGroupPool_offset;
806 header.nameGroupPool_count = (int16_t) nameGroupPool_count;
807 header.stringPool_offset = stringPool_offset;
808 header.stringPool_count = (int16_t) stringPool_count - 1; // don't include "" entry
809
810 U_ASSERT(valueMap_count <= 0x7FFF);
811 U_ASSERT(nameGroupPool_count <= 0x7FFF);
812 U_ASSERT(stringPool_count <= 0x7FFF);
813
814 // 3:
815 Offset* p = enumToValue->getOffsetArray();
816 EnumValue* e = enumToValue->getEnumArray();
817 U_ASSERT(valueMap_count == enumToValue->count);
818 for (i=0; i<valueMap_count; ++i) {
819 p[i] = (Offset)(valueMap_offset + sizeof(ValueMap) * i);
820 }
821
822 // 4:
823 for (i=0; i<valueMap_count; ++i) {
824 ValueMap& v = valueMap[i];
825 v.enumToName_offset = v.ncEnumToName_offset = 0;
826 if (valueEnumToName[i] != 0) {
827 v.enumToName_offset = valueEnumToName_offset[i];
828 }
829 if (valueNCEnumToName[i] != 0) {
830 v.ncEnumToName_offset = valueEnumToName_offset[i];
831 }
832 v.nameToEnum_offset = valueNameToEnum_offset[i];
833 }
834 }
835
836 void Builder::fixup() {
837 computeOffsets();
838 fixupStringPoolOffsets();
839 fixupNameGroupPoolOffsets();
840 fixupMiscellaneousOffsets();
841 }
842
843 int8_t* Builder::createData(int32_t& length) const {
844 length = total_size;
845 int8_t* result = MALLOC(int8_t, length);
846
847 int8_t* p = result;
848 int8_t* limit = result + length;
849
850 #define APPEND2(x, size) \
851 U_ASSERT((p+size)<=limit); \
852 uprv_memcpy(p, x, size); \
853 p += size
854
855 #define APPEND(x) APPEND2(x, x##_size)
856
857 APPEND2(&header, sizeof(header));
858 APPEND(enumToName);
859 APPEND(nameToEnum);
860 APPEND(enumToValue);
861 APPEND(valueMap);
862
863 for (int32_t i=0; i<valueMap_count; ++i) {
864 U_ASSERT((valueEnumToName[i] != 0 && valueNCEnumToName[i] == 0) ||
865 (valueEnumToName[i] == 0 && valueNCEnumToName[i] != 0));
866 if (valueEnumToName[i] != 0) {
867 APPEND2(valueEnumToName[i], valueEnumToName_size[i]);
868 }
869 if (valueNCEnumToName[i] != 0) {
870 APPEND2(valueNCEnumToName[i], valueEnumToName_size[i]);
871 }
872 APPEND2(valueNameToEnum[i], valueNameToEnum_size[i]);
873 }
874
875 APPEND(nameGroupPool);
876 APPEND(stringPool);
877
878 U_ASSERT(p == limit);
879 return result;
880 }
881
882 // END Builder
883 //----------------------------------------------------------------------
884
885 /* UDataInfo cf. udata.h */
886 static UDataInfo dataInfo = {
887 sizeof(UDataInfo),
888 0,
889
890 U_IS_BIG_ENDIAN,
891 U_CHARSET_FAMILY,
892 sizeof(UChar),
893 0,
894
895 {PNAME_SIG_0, PNAME_SIG_1, PNAME_SIG_2, PNAME_SIG_3},
896 {PNAME_FORMAT_VERSION, 0, 0, 0}, /* formatVersion */
897 {VERSION_0, VERSION_1, VERSION_2, VERSION_3} /* Unicode version */
898 };
899
900 // Glue for C<->C++
901 U_CAPI int compareAliasNames(const void* elem1, const void* elem2) {
902 return ((const AliasName*)elem1)->compare(*(const AliasName*)elem2);
903 }
904
905 class genpname {
906
907 // command-line options
908 UBool useCopyright;
909 UBool verbose;
910 int32_t debug;
911
912 public:
913 int MMain(int argc, char *argv[]);
914
915 private:
916 NameToEnumEntry* createNameIndex(const AliasList& list,
917 int32_t& nameIndexCount);
918
919 EnumToNameGroupEntry* createEnumIndex(const AliasList& list);
920
921 int32_t writeDataFile(const char *destdir, const Builder&);
922 };
923
924 int main(int argc, char *argv[]) {
925 genpname app;
926 U_MAIN_INIT_ARGS(argc, argv);
927 return app.MMain(argc, argv);
928 }
929
930 static UOption options[]={
931 UOPTION_HELP_H,
932 UOPTION_HELP_QUESTION_MARK,
933 UOPTION_COPYRIGHT,
934 UOPTION_DESTDIR,
935 UOPTION_VERBOSE,
936 UOPTION_DEF("debug", 'D', UOPT_REQUIRES_ARG),
937 };
938
939 NameToEnumEntry* genpname::createNameIndex(const AliasList& list,
940 int32_t& nameIndexCount) {
941
942 // Build name => enum map
943
944 // This is an n->1 map. There are typically multiple names
945 // mapping to one enum. The name index is sorted in order of the name,
946 // as defined by the uprv_compareAliasNames() function.
947
948 int32_t i, j;
949 int32_t count = list.count();
950
951 // compute upper limit on number of names in the index
952 int32_t nameIndexCapacity = count * MAX_NAMES_PER_GROUP;
953 NameToEnumEntry* nameIndex = MALLOC(NameToEnumEntry, nameIndexCapacity);
954
955 nameIndexCount = 0;
956 int32_t names[MAX_NAMES_PER_GROUP];
957 for (i=0; i<count; ++i) {
958 const Alias& p = list[i];
959 int32_t n = p.getUniqueNames(names);
960 for (j=0; j<n; ++j) {
961 U_ASSERT(nameIndexCount < nameIndexCapacity);
962 nameIndex[nameIndexCount++] =
963 NameToEnumEntry(names[j], p.enumValue);
964 }
965 }
966 qsort((void*) nameIndex, nameIndexCount, sizeof(nameIndex[0]),
967 compareNameToEnumEntry);
968 if (debug>1) {
969 printf("Alias names: %d\n", nameIndexCount);
970 for (i=0; i<nameIndexCount; ++i) {
971 printf("%s => %d\n",
972 STRING_TABLE[nameIndex[i].nameIndex].str,
973 nameIndex[i].enumValue);
974 }
975 printf("\n");
976 }
977 // make sure there are no duplicates. for a sorted list we need
978 // only compare adjacent items. Alias.getUniqueNames() has
979 // already eliminated duplicate names for a single property, which
980 // does occur, so we're checking for duplicate names between two
981 // properties, which should never occur.
982 UBool ok = TRUE;
983 for (i=1; i<nameIndexCount; ++i) {
984 if (STRING_TABLE[nameIndex[i-1].nameIndex] ==
985 STRING_TABLE[nameIndex[i].nameIndex]) {
986 printf("Error: Duplicate names in property list: \"%s\", \"%s\"\n",
987 STRING_TABLE[nameIndex[i-1].nameIndex].str,
988 STRING_TABLE[nameIndex[i].nameIndex].str);
989 ok = FALSE;
990 }
991 }
992 if (!ok) {
993 die("Two or more duplicate names in property list");
994 }
995
996 return nameIndex;
997 }
998
999 EnumToNameGroupEntry* genpname::createEnumIndex(const AliasList& list) {
1000
1001 // Build the enum => name map
1002
1003 // This is a 1->n map. Each enum maps to 1 or more names. To
1004 // accomplish this the index entry points to an element of the
1005 // NAME_GROUP array. This is the short name (which may be empty).
1006 // From there, subsequent elements of NAME_GROUP are alternate
1007 // names for this enum, up to and including the first one that is
1008 // negative (negate for actual index).
1009
1010 int32_t i, j, k;
1011 int32_t count = list.count();
1012
1013 EnumToNameGroupEntry* enumIndex = MALLOC(EnumToNameGroupEntry, count);
1014 for (i=0; i<count; ++i) {
1015 const Alias& p = list[i];
1016 enumIndex[i] = EnumToNameGroupEntry(p.enumValue, p.nameGroupIndex);
1017 }
1018 qsort((void*) enumIndex, count, sizeof(enumIndex[0]),
1019 compareEnumToNameGroupEntry);
1020 if (debug>1) {
1021 printf("Property enums: %d\n", count);
1022 for (i=0; i<count; ++i) {
1023 printf("%d => %d: ",
1024 enumIndex[i].enumValue,
1025 enumIndex[i].nameGroupIndex);
1026 UBool done = FALSE;
1027 for (j=enumIndex[i].nameGroupIndex; !done; ++j) {
1028 k = NAME_GROUP[j];
1029 if (k < 0) {
1030 k = -k;
1031 done = TRUE;
1032 }
1033 printf("\"%s\"", STRING_TABLE[k].str);
1034 if (!done) printf(", ");
1035 }
1036 printf("\n");
1037 }
1038 printf("\n");
1039 }
1040 return enumIndex;
1041 }
1042
1043 int genpname::MMain(int argc, char* argv[]) {
1044
1045 int32_t i, j;
1046
1047 /* preset then read command line options */
1048 options[3].value=u_getDataDirectory();
1049 argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
1050
1051 /* error handling, printing usage message */
1052 if (argc<0) {
1053 fprintf(stderr,
1054 "error in command line argument \"%s\"\n",
1055 argv[-argc]);
1056 }
1057
1058 debug = options[5].doesOccur ? (*options[5].value - '0') : 0;
1059
1060 if (argc!=1 || options[0].doesOccur || options[1].doesOccur ||
1061 debug < 0 || debug > 9) {
1062 fprintf(stderr,
1063 "usage: %s [-options]\n"
1064 "\tcreate " U_ICUDATA_NAME "_" PNAME_DATA_NAME "." PNAME_DATA_TYPE "\n"
1065 "options:\n"
1066 "\t-h or -? or --help this usage text\n"
1067 "\t-v or --verbose turn on verbose output\n"
1068 "\t-c or --copyright include a copyright notice\n"
1069 "\t-d or --destdir destination directory, followed by the path\n"
1070 "\t-D or --debug 0..9 emit debugging messages (if > 0)\n",
1071 argv[0]);
1072 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
1073 }
1074
1075 /* get the options values */
1076 useCopyright=options[2].doesOccur;
1077 verbose = options[4].doesOccur;
1078
1079 // ------------------------------------------------------------
1080 // Sort the string table. This produces the proper sorting
1081 // using the actual comparison function we will use.
1082 qsort((void*) STRING_TABLE, STRING_COUNT, sizeof(STRING_TABLE[0]),
1083 compareAliasNames);
1084 if (debug>1) {
1085 printf("String pool: %d\n", STRING_COUNT);
1086 }
1087 for (i=0; i<STRING_COUNT; ++i) {
1088 REMAP[STRING_TABLE[i].index] = i;
1089 if (debug>1) {
1090 if (i != 0) printf(", ");
1091 printf("%s (%d)", STRING_TABLE[i].str, STRING_TABLE[i].index);
1092 }
1093 }
1094 if (debug>1) {
1095 printf("\n\n");
1096 }
1097
1098 // ------------------------------------------------------------
1099 // Fixup the NAME_GROUP indices so they match the sorted order
1100 for (i=0; i<NAME_GROUP_COUNT; ++i) {
1101 // keep negative entries (end markers) negative
1102 if (NAME_GROUP[i] < 0) {
1103 NAME_GROUP[i] = -REMAP[-NAME_GROUP[i]];
1104 } else {
1105 NAME_GROUP[i] = REMAP[NAME_GROUP[i]];
1106 }
1107 }
1108
1109 // ------------------------------------------------------------
1110 // Create top-level property indices
1111
1112 PropertyArrayList props(PROPERTY, PROPERTY_COUNT);
1113 int32_t propNameCount;
1114 NameToEnumEntry* propName = createNameIndex(props, propNameCount);
1115 EnumToNameGroupEntry* propEnum = createEnumIndex(props);
1116
1117 // ------------------------------------------------------------
1118 // Create indices for the value list for each enumerated property
1119
1120 // This will have more entries than we need...
1121 EnumToValueEntry* enumToValue = MALLOC(EnumToValueEntry, PROPERTY_COUNT);
1122 int32_t enumToValue_count = 0;
1123 for (i=0, j=0; i<PROPERTY_COUNT; ++i) {
1124 if (PROPERTY[i].valueCount == 0) continue;
1125 AliasArrayList values(PROPERTY[i].valueList,
1126 PROPERTY[i].valueCount);
1127 enumToValue[j].enumValue = PROPERTY[i].enumValue;
1128 enumToValue[j].enumToName = createEnumIndex(values);
1129 enumToValue[j].enumToName_count = PROPERTY[i].valueCount;
1130 enumToValue[j].nameToEnum = createNameIndex(values,
1131 enumToValue[j].nameToEnum_count);
1132 ++j;
1133 }
1134 enumToValue_count = j;
1135 qsort((void*) enumToValue, enumToValue_count, sizeof(enumToValue[0]),
1136 compareEnumToValueEntry);
1137
1138 // ------------------------------------------------------------
1139 // Build PropertyAliases layout in memory
1140
1141 Builder builder(debug);
1142
1143 builder.buildTopLevelProperties(propName,
1144 propNameCount,
1145 propEnum,
1146 PROPERTY_COUNT);
1147
1148 builder.buildValues(enumToValue,
1149 enumToValue_count);
1150
1151 builder.buildStringPool(STRING_TABLE,
1152 STRING_COUNT,
1153 NAME_GROUP,
1154 NAME_GROUP_COUNT);
1155
1156 builder.fixup();
1157
1158 ////////////////////////////////////////////////////////////
1159 // Write the output file
1160 ////////////////////////////////////////////////////////////
1161 int32_t wlen = writeDataFile(options[3].value, builder);
1162 if (verbose) {
1163 fprintf(stdout, "Output file: %s.%s, %ld bytes\n",
1164 U_ICUDATA_NAME "_" PNAME_DATA_NAME, PNAME_DATA_TYPE, (long)wlen);
1165 }
1166
1167 return 0; // success
1168 }
1169
1170 int32_t genpname::writeDataFile(const char *destdir, const Builder& builder) {
1171 int32_t length;
1172 int8_t* data = builder.createData(length);
1173
1174 UNewDataMemory *pdata;
1175 UErrorCode status = U_ZERO_ERROR;
1176
1177 pdata = udata_create(destdir, PNAME_DATA_TYPE, U_ICUDATA_NAME "_" PNAME_DATA_NAME, &dataInfo,
1178 useCopyright ? U_COPYRIGHT_STRING : 0, &status);
1179 if (U_FAILURE(status)) {
1180 die("Unable to create data memory");
1181 }
1182
1183 udata_writeBlock(pdata, data, length);
1184
1185 int32_t dataLength = (int32_t) udata_finish(pdata, &status);
1186 if (U_FAILURE(status)) {
1187 die("Error writing output file");
1188 }
1189 if (dataLength != length) {
1190 die("Written file doesn't match expected size");
1191 }
1192
1193 return dataLength;
1194 }
1195
1196 //eof