]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
2ca993e8 A |
3 | /** |
4 | * Copyright (c) 1999-2016, International Business Machines Corporation and | |
5 | * others. All Rights Reserved. | |
6 | * | |
7 | * Generator for source/i18n/collunsafe.h | |
8 | * see Makefile | |
9 | */ | |
10 | ||
11 | #include <stdio.h> | |
12 | #include "unicode/uversion.h" | |
13 | #include "unicode/uniset.h" | |
14 | #include "collationroot.h" | |
15 | #include "collationtailoring.h" | |
16 | ||
17 | /** | |
18 | * Define the type of generator to use. Choose one. | |
19 | */ | |
20 | #define SERIALIZE 1 //< Default: use UnicodeSet.serialize() and a new internal c'tor | |
21 | #define RANGES 0 //< Enumerate ranges (works, not as fast. No support in collationdatareader.cpp) | |
22 | #define PATTERN 0 //< Generate a UnicodeSet pattern (depends on #11891 AND probably slower. No support in collationdatareader.cpp) | |
23 | ||
24 | int main(int argc, const char *argv[]) { | |
25 | UErrorCode errorCode = U_ZERO_ERROR; | |
26 | ||
27 | // Get the unsafeBackwardsSet | |
28 | const CollationCacheEntry *rootEntry = CollationRoot::getRootCacheEntry(errorCode); | |
29 | if(U_FAILURE(errorCode)) { | |
30 | fprintf(stderr, "Err: %s getting root cache entry\n", u_errorName(errorCode)); | |
31 | return 1; | |
32 | } | |
33 | const UVersionInfo &version = rootEntry->tailoring->version; | |
34 | const UnicodeSet *unsafeBackwardSet = rootEntry->tailoring->unsafeBackwardSet; | |
35 | char verString[20]; | |
36 | u_versionToString(version, verString); | |
37 | fprintf(stderr, "Generating data for ICU %s, Collation %s\n", U_ICU_VERSION, verString); | |
38 | int32_t rangeCount = unsafeBackwardSet->getRangeCount(); | |
39 | ||
40 | #if SERIALIZE | |
41 | fprintf(stderr, ".. serializing\n"); | |
42 | // UnicodeSet serialization | |
43 | ||
44 | UErrorCode preflightCode = U_ZERO_ERROR; | |
45 | // preflight | |
46 | int32_t serializedCount = unsafeBackwardSet->serialize(NULL,0,preflightCode); | |
47 | if(U_FAILURE(preflightCode) && preflightCode != U_BUFFER_OVERFLOW_ERROR) { | |
48 | fprintf(stderr, "Err: %s preflighting unicode set\n", u_errorName(preflightCode)); | |
49 | return 1; | |
50 | } | |
51 | uint16_t *serializedData = new uint16_t[serializedCount]; | |
52 | // serialize | |
53 | unsafeBackwardSet->serialize(serializedData, serializedCount, errorCode); | |
54 | if(U_FAILURE(errorCode)) { | |
55 | delete [] serializedData; | |
56 | fprintf(stderr, "Err: %s serializing unicodeset\n", u_errorName(errorCode)); | |
57 | return 1; | |
58 | } | |
59 | #endif | |
60 | ||
61 | #if PATTERN | |
62 | fprintf(stderr,".. pattern. (Note: collationdatareader.cpp does not support this form also see #11891)\n"); | |
63 | // attempt to use pattern | |
64 | ||
65 | UnicodeString pattern; | |
66 | UnicodeSet set(*unsafeBackwardSet); | |
67 | set.compact(); | |
68 | set.toPattern(pattern, FALSE); | |
69 | ||
70 | if(U_SUCCESS(errorCode)) { | |
71 | // This fails (bug# ?) - which is why this method was abandoned. | |
72 | ||
73 | // UnicodeSet usA(pattern, errorCode); | |
74 | // fprintf(stderr, "\n%s:%d: err creating set A %s\n", __FILE__, __LINE__, u_errorName(errorCode)); | |
75 | // return 1; | |
76 | } | |
77 | ||
78 | ||
79 | const UChar *buf = pattern.getBuffer(); | |
80 | int32_t needed = pattern.length(); | |
81 | ||
82 | ||
83 | { | |
84 | char buf2[2048]; | |
85 | int32_t len2 = pattern.extract(0, pattern.length(), buf2, "utf-8"); | |
86 | buf2[len2]=0; | |
87 | fprintf(stderr,"===\n%s\n===\n", buf2); | |
88 | } | |
89 | ||
90 | const UnicodeString unsafeBackwardPattern(FALSE, buf, needed); | |
91 | if(U_SUCCESS(errorCode)) { | |
92 | //UnicodeSet us(unsafeBackwardPattern, errorCode); | |
93 | // fprintf(stderr, "\n%s:%d: err creating set %s\n", __FILE__, __LINE__, u_errorName(errorCode)); | |
94 | } else { | |
95 | fprintf(stderr, "Uset OK - \n"); | |
96 | } | |
97 | #endif | |
98 | ||
99 | ||
100 | // Generate the output file. | |
101 | ||
102 | printf("// collunsafe.h\n"); | |
103 | printf("// %s\n", U_COPYRIGHT_STRING); | |
104 | printf("\n"); | |
105 | printf("// To be included by collationdatareader.cpp, and generated by gencolusb.\n"); | |
106 | printf("// Machine generated, do not edit.\n"); | |
107 | printf("\n"); | |
108 | printf("#ifndef COLLUNSAFE_H\n" | |
109 | "#define COLLUNSAFE_H\n" | |
110 | "\n" | |
111 | "#include \"unicode/utypes.h\"\n" | |
112 | "\n" | |
113 | "#define COLLUNSAFE_ICU_VERSION \"" U_ICU_VERSION "\"\n"); | |
114 | printf("#define COLLUNSAFE_COLL_VERSION \"%s\"\n", verString); | |
115 | ||
116 | ||
117 | ||
118 | #if PATTERN | |
119 | printf("#define COLLUNSAFE_PATTERN 1\n"); | |
120 | printf("static const int32_t collunsafe_len = %d;\n", needed); | |
121 | printf("static const UChar collunsafe_pattern[collunsafe_len] = {\n"); | |
122 | for(int i=0;i<needed;i++) { | |
123 | if( (i>0) && (i%8 == 0) ) { | |
124 | printf(" // %d\n", i); | |
125 | } | |
126 | printf("0x%04X", buf[i]); // TODO check | |
127 | if(i != (needed-1)) { | |
128 | printf(", "); | |
129 | } | |
130 | } | |
131 | printf(" //%d\n};\n", (needed-1)); | |
132 | #endif | |
133 | ||
134 | #if RANGE | |
135 | fprintf(stderr, "COLLUNSAFE_RANGE - no code support in collationdatareader.cpp for this\n"); | |
136 | printf("#define COLLUNSAFE_RANGE 1\n"); | |
137 | printf("static const int32_t unsafe_rangeCount = %d;\n", rangeCount); | |
138 | printf("static const UChar32 unsafe_ranges[%d] = { \n", rangeCount*2); | |
139 | for(int32_t i=0;i<rangeCount;i++) { | |
140 | printf(" 0x%04X, 0x%04X, // %d\n", | |
141 | unsafeBackwardSet->getRangeStart(i), | |
142 | unsafeBackwardSet->getRangeEnd(i), | |
143 | i); | |
144 | } | |
145 | printf("};\n"); | |
146 | #endif | |
147 | ||
148 | #if SERIALIZE | |
149 | printf("#define COLLUNSAFE_SERIALIZE 1\n"); | |
150 | printf("static const int32_t unsafe_serializedCount = %d;\n", serializedCount); | |
151 | printf("static const uint16_t unsafe_serializedData[%d] = { \n", serializedCount); | |
152 | for(int32_t i=0;i<serializedCount;i++) { | |
153 | if( (i>0) && (i%8 == 0) ) { | |
154 | printf(" // %d\n", i); | |
155 | } | |
156 | printf("0x%04X", serializedData[i]); // TODO check | |
157 | if(i != (serializedCount-1)) { | |
158 | printf(", "); | |
159 | } | |
160 | } | |
161 | printf("};\n"); | |
162 | #endif | |
163 | ||
164 | printf("#endif\n"); | |
165 | fflush(stderr); | |
166 | fflush(stdout); | |
167 | return(U_SUCCESS(errorCode)?0:1); | |
168 | } |