]>
Commit | Line | Data |
---|---|---|
2ca993e8 A |
1 | /** |
2 | * Copyright (c) 1999-2016, International Business Machines Corporation and | |
3 | * others. All Rights Reserved. | |
4 | * | |
5 | * Generator for source/i18n/collunsafe.h | |
6 | * see Makefile | |
7 | */ | |
8 | ||
9 | #include <stdio.h> | |
10 | #include "unicode/uversion.h" | |
11 | #include "unicode/uniset.h" | |
12 | #include "collationroot.h" | |
13 | #include "collationtailoring.h" | |
14 | ||
15 | /** | |
16 | * Define the type of generator to use. Choose one. | |
17 | */ | |
18 | #define SERIALIZE 1 //< Default: use UnicodeSet.serialize() and a new internal c'tor | |
19 | #define RANGES 0 //< Enumerate ranges (works, not as fast. No support in collationdatareader.cpp) | |
20 | #define PATTERN 0 //< Generate a UnicodeSet pattern (depends on #11891 AND probably slower. No support in collationdatareader.cpp) | |
21 | ||
22 | int main(int argc, const char *argv[]) { | |
23 | UErrorCode errorCode = U_ZERO_ERROR; | |
24 | ||
25 | // Get the unsafeBackwardsSet | |
26 | const CollationCacheEntry *rootEntry = CollationRoot::getRootCacheEntry(errorCode); | |
27 | if(U_FAILURE(errorCode)) { | |
28 | fprintf(stderr, "Err: %s getting root cache entry\n", u_errorName(errorCode)); | |
29 | return 1; | |
30 | } | |
31 | const UVersionInfo &version = rootEntry->tailoring->version; | |
32 | const UnicodeSet *unsafeBackwardSet = rootEntry->tailoring->unsafeBackwardSet; | |
33 | char verString[20]; | |
34 | u_versionToString(version, verString); | |
35 | fprintf(stderr, "Generating data for ICU %s, Collation %s\n", U_ICU_VERSION, verString); | |
36 | int32_t rangeCount = unsafeBackwardSet->getRangeCount(); | |
37 | ||
38 | #if SERIALIZE | |
39 | fprintf(stderr, ".. serializing\n"); | |
40 | // UnicodeSet serialization | |
41 | ||
42 | UErrorCode preflightCode = U_ZERO_ERROR; | |
43 | // preflight | |
44 | int32_t serializedCount = unsafeBackwardSet->serialize(NULL,0,preflightCode); | |
45 | if(U_FAILURE(preflightCode) && preflightCode != U_BUFFER_OVERFLOW_ERROR) { | |
46 | fprintf(stderr, "Err: %s preflighting unicode set\n", u_errorName(preflightCode)); | |
47 | return 1; | |
48 | } | |
49 | uint16_t *serializedData = new uint16_t[serializedCount]; | |
50 | // serialize | |
51 | unsafeBackwardSet->serialize(serializedData, serializedCount, errorCode); | |
52 | if(U_FAILURE(errorCode)) { | |
53 | delete [] serializedData; | |
54 | fprintf(stderr, "Err: %s serializing unicodeset\n", u_errorName(errorCode)); | |
55 | return 1; | |
56 | } | |
57 | #endif | |
58 | ||
59 | #if PATTERN | |
60 | fprintf(stderr,".. pattern. (Note: collationdatareader.cpp does not support this form also see #11891)\n"); | |
61 | // attempt to use pattern | |
62 | ||
63 | UnicodeString pattern; | |
64 | UnicodeSet set(*unsafeBackwardSet); | |
65 | set.compact(); | |
66 | set.toPattern(pattern, FALSE); | |
67 | ||
68 | if(U_SUCCESS(errorCode)) { | |
69 | // This fails (bug# ?) - which is why this method was abandoned. | |
70 | ||
71 | // UnicodeSet usA(pattern, errorCode); | |
72 | // fprintf(stderr, "\n%s:%d: err creating set A %s\n", __FILE__, __LINE__, u_errorName(errorCode)); | |
73 | // return 1; | |
74 | } | |
75 | ||
76 | ||
77 | const UChar *buf = pattern.getBuffer(); | |
78 | int32_t needed = pattern.length(); | |
79 | ||
80 | ||
81 | { | |
82 | char buf2[2048]; | |
83 | int32_t len2 = pattern.extract(0, pattern.length(), buf2, "utf-8"); | |
84 | buf2[len2]=0; | |
85 | fprintf(stderr,"===\n%s\n===\n", buf2); | |
86 | } | |
87 | ||
88 | const UnicodeString unsafeBackwardPattern(FALSE, buf, needed); | |
89 | if(U_SUCCESS(errorCode)) { | |
90 | //UnicodeSet us(unsafeBackwardPattern, errorCode); | |
91 | // fprintf(stderr, "\n%s:%d: err creating set %s\n", __FILE__, __LINE__, u_errorName(errorCode)); | |
92 | } else { | |
93 | fprintf(stderr, "Uset OK - \n"); | |
94 | } | |
95 | #endif | |
96 | ||
97 | ||
98 | // Generate the output file. | |
99 | ||
100 | printf("// collunsafe.h\n"); | |
101 | printf("// %s\n", U_COPYRIGHT_STRING); | |
102 | printf("\n"); | |
103 | printf("// To be included by collationdatareader.cpp, and generated by gencolusb.\n"); | |
104 | printf("// Machine generated, do not edit.\n"); | |
105 | printf("\n"); | |
106 | printf("#ifndef COLLUNSAFE_H\n" | |
107 | "#define COLLUNSAFE_H\n" | |
108 | "\n" | |
109 | "#include \"unicode/utypes.h\"\n" | |
110 | "\n" | |
111 | "#define COLLUNSAFE_ICU_VERSION \"" U_ICU_VERSION "\"\n"); | |
112 | printf("#define COLLUNSAFE_COLL_VERSION \"%s\"\n", verString); | |
113 | ||
114 | ||
115 | ||
116 | #if PATTERN | |
117 | printf("#define COLLUNSAFE_PATTERN 1\n"); | |
118 | printf("static const int32_t collunsafe_len = %d;\n", needed); | |
119 | printf("static const UChar collunsafe_pattern[collunsafe_len] = {\n"); | |
120 | for(int i=0;i<needed;i++) { | |
121 | if( (i>0) && (i%8 == 0) ) { | |
122 | printf(" // %d\n", i); | |
123 | } | |
124 | printf("0x%04X", buf[i]); // TODO check | |
125 | if(i != (needed-1)) { | |
126 | printf(", "); | |
127 | } | |
128 | } | |
129 | printf(" //%d\n};\n", (needed-1)); | |
130 | #endif | |
131 | ||
132 | #if RANGE | |
133 | fprintf(stderr, "COLLUNSAFE_RANGE - no code support in collationdatareader.cpp for this\n"); | |
134 | printf("#define COLLUNSAFE_RANGE 1\n"); | |
135 | printf("static const int32_t unsafe_rangeCount = %d;\n", rangeCount); | |
136 | printf("static const UChar32 unsafe_ranges[%d] = { \n", rangeCount*2); | |
137 | for(int32_t i=0;i<rangeCount;i++) { | |
138 | printf(" 0x%04X, 0x%04X, // %d\n", | |
139 | unsafeBackwardSet->getRangeStart(i), | |
140 | unsafeBackwardSet->getRangeEnd(i), | |
141 | i); | |
142 | } | |
143 | printf("};\n"); | |
144 | #endif | |
145 | ||
146 | #if SERIALIZE | |
147 | printf("#define COLLUNSAFE_SERIALIZE 1\n"); | |
148 | printf("static const int32_t unsafe_serializedCount = %d;\n", serializedCount); | |
149 | printf("static const uint16_t unsafe_serializedData[%d] = { \n", serializedCount); | |
150 | for(int32_t i=0;i<serializedCount;i++) { | |
151 | if( (i>0) && (i%8 == 0) ) { | |
152 | printf(" // %d\n", i); | |
153 | } | |
154 | printf("0x%04X", serializedData[i]); // TODO check | |
155 | if(i != (serializedCount-1)) { | |
156 | printf(", "); | |
157 | } | |
158 | } | |
159 | printf("};\n"); | |
160 | #endif | |
161 | ||
162 | printf("#endif\n"); | |
163 | fflush(stderr); | |
164 | fflush(stdout); | |
165 | return(U_SUCCESS(errorCode)?0:1); | |
166 | } |