]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/gencolusb/extract_unsafe_backwards.cpp
ICU-57163.0.1.tar.gz
[apple/icu.git] / icuSources / tools / gencolusb / extract_unsafe_backwards.cpp
1 /**
2 * Copyright (c) 1999-2016, International Business Machines Corporation and
3 * others. All Rights Reserved.
4 *
5 * Generator for source/i18n/collunsafe.h
6 * see Makefile
7 */
8
9 #include <stdio.h>
10 #include "unicode/uversion.h"
11 #include "unicode/uniset.h"
12 #include "collationroot.h"
13 #include "collationtailoring.h"
14
15 /**
16 * Define the type of generator to use. Choose one.
17 */
18 #define SERIALIZE 1 //< Default: use UnicodeSet.serialize() and a new internal c'tor
19 #define RANGES 0 //< Enumerate ranges (works, not as fast. No support in collationdatareader.cpp)
20 #define PATTERN 0 //< Generate a UnicodeSet pattern (depends on #11891 AND probably slower. No support in collationdatareader.cpp)
21
22 int main(int argc, const char *argv[]) {
23 UErrorCode errorCode = U_ZERO_ERROR;
24
25 // Get the unsafeBackwardsSet
26 const CollationCacheEntry *rootEntry = CollationRoot::getRootCacheEntry(errorCode);
27 if(U_FAILURE(errorCode)) {
28 fprintf(stderr, "Err: %s getting root cache entry\n", u_errorName(errorCode));
29 return 1;
30 }
31 const UVersionInfo &version = rootEntry->tailoring->version;
32 const UnicodeSet *unsafeBackwardSet = rootEntry->tailoring->unsafeBackwardSet;
33 char verString[20];
34 u_versionToString(version, verString);
35 fprintf(stderr, "Generating data for ICU %s, Collation %s\n", U_ICU_VERSION, verString);
36 int32_t rangeCount = unsafeBackwardSet->getRangeCount();
37
38 #if SERIALIZE
39 fprintf(stderr, ".. serializing\n");
40 // UnicodeSet serialization
41
42 UErrorCode preflightCode = U_ZERO_ERROR;
43 // preflight
44 int32_t serializedCount = unsafeBackwardSet->serialize(NULL,0,preflightCode);
45 if(U_FAILURE(preflightCode) && preflightCode != U_BUFFER_OVERFLOW_ERROR) {
46 fprintf(stderr, "Err: %s preflighting unicode set\n", u_errorName(preflightCode));
47 return 1;
48 }
49 uint16_t *serializedData = new uint16_t[serializedCount];
50 // serialize
51 unsafeBackwardSet->serialize(serializedData, serializedCount, errorCode);
52 if(U_FAILURE(errorCode)) {
53 delete [] serializedData;
54 fprintf(stderr, "Err: %s serializing unicodeset\n", u_errorName(errorCode));
55 return 1;
56 }
57 #endif
58
59 #if PATTERN
60 fprintf(stderr,".. pattern. (Note: collationdatareader.cpp does not support this form also see #11891)\n");
61 // attempt to use pattern
62
63 UnicodeString pattern;
64 UnicodeSet set(*unsafeBackwardSet);
65 set.compact();
66 set.toPattern(pattern, FALSE);
67
68 if(U_SUCCESS(errorCode)) {
69 // This fails (bug# ?) - which is why this method was abandoned.
70
71 // UnicodeSet usA(pattern, errorCode);
72 // fprintf(stderr, "\n%s:%d: err creating set A %s\n", __FILE__, __LINE__, u_errorName(errorCode));
73 // return 1;
74 }
75
76
77 const UChar *buf = pattern.getBuffer();
78 int32_t needed = pattern.length();
79
80 // print
81 {
82 char buf2[2048];
83 int32_t len2 = pattern.extract(0, pattern.length(), buf2, "utf-8");
84 buf2[len2]=0;
85 fprintf(stderr,"===\n%s\n===\n", buf2);
86 }
87
88 const UnicodeString unsafeBackwardPattern(FALSE, buf, needed);
89 if(U_SUCCESS(errorCode)) {
90 //UnicodeSet us(unsafeBackwardPattern, errorCode);
91 // fprintf(stderr, "\n%s:%d: err creating set %s\n", __FILE__, __LINE__, u_errorName(errorCode));
92 } else {
93 fprintf(stderr, "Uset OK - \n");
94 }
95 #endif
96
97
98 // Generate the output file.
99
100 printf("// collunsafe.h\n");
101 printf("// %s\n", U_COPYRIGHT_STRING);
102 printf("\n");
103 printf("// To be included by collationdatareader.cpp, and generated by gencolusb.\n");
104 printf("// Machine generated, do not edit.\n");
105 printf("\n");
106 printf("#ifndef COLLUNSAFE_H\n"
107 "#define COLLUNSAFE_H\n"
108 "\n"
109 "#include \"unicode/utypes.h\"\n"
110 "\n"
111 "#define COLLUNSAFE_ICU_VERSION \"" U_ICU_VERSION "\"\n");
112 printf("#define COLLUNSAFE_COLL_VERSION \"%s\"\n", verString);
113
114
115
116 #if PATTERN
117 printf("#define COLLUNSAFE_PATTERN 1\n");
118 printf("static const int32_t collunsafe_len = %d;\n", needed);
119 printf("static const UChar collunsafe_pattern[collunsafe_len] = {\n");
120 for(int i=0;i<needed;i++) {
121 if( (i>0) && (i%8 == 0) ) {
122 printf(" // %d\n", i);
123 }
124 printf("0x%04X", buf[i]); // TODO check
125 if(i != (needed-1)) {
126 printf(", ");
127 }
128 }
129 printf(" //%d\n};\n", (needed-1));
130 #endif
131
132 #if RANGE
133 fprintf(stderr, "COLLUNSAFE_RANGE - no code support in collationdatareader.cpp for this\n");
134 printf("#define COLLUNSAFE_RANGE 1\n");
135 printf("static const int32_t unsafe_rangeCount = %d;\n", rangeCount);
136 printf("static const UChar32 unsafe_ranges[%d] = { \n", rangeCount*2);
137 for(int32_t i=0;i<rangeCount;i++) {
138 printf(" 0x%04X, 0x%04X, // %d\n",
139 unsafeBackwardSet->getRangeStart(i),
140 unsafeBackwardSet->getRangeEnd(i),
141 i);
142 }
143 printf("};\n");
144 #endif
145
146 #if SERIALIZE
147 printf("#define COLLUNSAFE_SERIALIZE 1\n");
148 printf("static const int32_t unsafe_serializedCount = %d;\n", serializedCount);
149 printf("static const uint16_t unsafe_serializedData[%d] = { \n", serializedCount);
150 for(int32_t i=0;i<serializedCount;i++) {
151 if( (i>0) && (i%8 == 0) ) {
152 printf(" // %d\n", i);
153 }
154 printf("0x%04X", serializedData[i]); // TODO check
155 if(i != (serializedCount-1)) {
156 printf(", ");
157 }
158 }
159 printf("};\n");
160 #endif
161
162 printf("#endif\n");
163 fflush(stderr);
164 fflush(stdout);
165 return(U_SUCCESS(errorCode)?0:1);
166 }