]>
git.saurik.com Git - apple/icu.git/blob - icuSources/tools/gencolusb/extract_unsafe_backwards.cpp
2 * Copyright (c) 1999-2016, International Business Machines Corporation and
3 * others. All Rights Reserved.
5 * Generator for source/i18n/collunsafe.h
10 #include "unicode/uversion.h"
11 #include "unicode/uniset.h"
12 #include "collationroot.h"
13 #include "collationtailoring.h"
16 * Define the type of generator to use. Choose one.
18 #define SERIALIZE 1 //< Default: use UnicodeSet.serialize() and a new internal c'tor
19 #define RANGES 0 //< Enumerate ranges (works, not as fast. No support in collationdatareader.cpp)
20 #define PATTERN 0 //< Generate a UnicodeSet pattern (depends on #11891 AND probably slower. No support in collationdatareader.cpp)
22 int main(int argc
, const char *argv
[]) {
23 UErrorCode errorCode
= U_ZERO_ERROR
;
25 // Get the unsafeBackwardsSet
26 const CollationCacheEntry
*rootEntry
= CollationRoot::getRootCacheEntry(errorCode
);
27 if(U_FAILURE(errorCode
)) {
28 fprintf(stderr
, "Err: %s getting root cache entry\n", u_errorName(errorCode
));
31 const UVersionInfo
&version
= rootEntry
->tailoring
->version
;
32 const UnicodeSet
*unsafeBackwardSet
= rootEntry
->tailoring
->unsafeBackwardSet
;
34 u_versionToString(version
, verString
);
35 fprintf(stderr
, "Generating data for ICU %s, Collation %s\n", U_ICU_VERSION
, verString
);
36 int32_t rangeCount
= unsafeBackwardSet
->getRangeCount();
39 fprintf(stderr
, ".. serializing\n");
40 // UnicodeSet serialization
42 UErrorCode preflightCode
= U_ZERO_ERROR
;
44 int32_t serializedCount
= unsafeBackwardSet
->serialize(NULL
,0,preflightCode
);
45 if(U_FAILURE(preflightCode
) && preflightCode
!= U_BUFFER_OVERFLOW_ERROR
) {
46 fprintf(stderr
, "Err: %s preflighting unicode set\n", u_errorName(preflightCode
));
49 uint16_t *serializedData
= new uint16_t[serializedCount
];
51 unsafeBackwardSet
->serialize(serializedData
, serializedCount
, errorCode
);
52 if(U_FAILURE(errorCode
)) {
53 delete [] serializedData
;
54 fprintf(stderr
, "Err: %s serializing unicodeset\n", u_errorName(errorCode
));
60 fprintf(stderr
,".. pattern. (Note: collationdatareader.cpp does not support this form also see #11891)\n");
61 // attempt to use pattern
63 UnicodeString pattern
;
64 UnicodeSet
set(*unsafeBackwardSet
);
66 set
.toPattern(pattern
, FALSE
);
68 if(U_SUCCESS(errorCode
)) {
69 // This fails (bug# ?) - which is why this method was abandoned.
71 // UnicodeSet usA(pattern, errorCode);
72 // fprintf(stderr, "\n%s:%d: err creating set A %s\n", __FILE__, __LINE__, u_errorName(errorCode));
77 const UChar
*buf
= pattern
.getBuffer();
78 int32_t needed
= pattern
.length();
83 int32_t len2
= pattern
.extract(0, pattern
.length(), buf2
, "utf-8");
85 fprintf(stderr
,"===\n%s\n===\n", buf2
);
88 const UnicodeString
unsafeBackwardPattern(FALSE
, buf
, needed
);
89 if(U_SUCCESS(errorCode
)) {
90 //UnicodeSet us(unsafeBackwardPattern, errorCode);
91 // fprintf(stderr, "\n%s:%d: err creating set %s\n", __FILE__, __LINE__, u_errorName(errorCode));
93 fprintf(stderr
, "Uset OK - \n");
98 // Generate the output file.
100 printf("// collunsafe.h\n");
101 printf("// %s\n", U_COPYRIGHT_STRING
);
103 printf("// To be included by collationdatareader.cpp, and generated by gencolusb.\n");
104 printf("// Machine generated, do not edit.\n");
106 printf("#ifndef COLLUNSAFE_H\n"
107 "#define COLLUNSAFE_H\n"
109 "#include \"unicode/utypes.h\"\n"
111 "#define COLLUNSAFE_ICU_VERSION \"" U_ICU_VERSION
"\"\n");
112 printf("#define COLLUNSAFE_COLL_VERSION \"%s\"\n", verString
);
117 printf("#define COLLUNSAFE_PATTERN 1\n");
118 printf("static const int32_t collunsafe_len = %d;\n", needed
);
119 printf("static const UChar collunsafe_pattern[collunsafe_len] = {\n");
120 for(int i
=0;i
<needed
;i
++) {
121 if( (i
>0) && (i%8
== 0) ) {
122 printf(" // %d\n", i
);
124 printf("0x%04X", buf
[i
]); // TODO check
125 if(i
!= (needed
-1)) {
129 printf(" //%d\n};\n", (needed
-1));
133 fprintf(stderr
, "COLLUNSAFE_RANGE - no code support in collationdatareader.cpp for this\n");
134 printf("#define COLLUNSAFE_RANGE 1\n");
135 printf("static const int32_t unsafe_rangeCount = %d;\n", rangeCount
);
136 printf("static const UChar32 unsafe_ranges[%d] = { \n", rangeCount
*2);
137 for(int32_t i
=0;i
<rangeCount
;i
++) {
138 printf(" 0x%04X, 0x%04X, // %d\n",
139 unsafeBackwardSet
->getRangeStart(i
),
140 unsafeBackwardSet
->getRangeEnd(i
),
147 printf("#define COLLUNSAFE_SERIALIZE 1\n");
148 printf("static const int32_t unsafe_serializedCount = %d;\n", serializedCount
);
149 printf("static const uint16_t unsafe_serializedData[%d] = { \n", serializedCount
);
150 for(int32_t i
=0;i
<serializedCount
;i
++) {
151 if( (i
>0) && (i%8
== 0) ) {
152 printf(" // %d\n", i
);
154 printf("0x%04X", serializedData
[i
]); // TODO check
155 if(i
!= (serializedCount
-1)) {
165 return(U_SUCCESS(errorCode
)?0:1);