]>
git.saurik.com Git - apple/icu.git/blob - icuSources/tools/gencolusb/extract_unsafe_backwards.cpp
   2  * Copyright (c) 1999-2016, International Business Machines Corporation and 
   3  * others. All Rights Reserved. 
   5  * Generator for source/i18n/collunsafe.h 
  10 #include "unicode/uversion.h" 
  11 #include "unicode/uniset.h" 
  12 #include "collationroot.h" 
  13 #include "collationtailoring.h" 
  16  * Define the type of generator to use. Choose one. 
  18 #define SERIALIZE 1   //< Default: use UnicodeSet.serialize() and a new internal c'tor 
  19 #define RANGES 0      //< Enumerate ranges (works, not as fast. No support in collationdatareader.cpp) 
  20 #define PATTERN 0     //< Generate a UnicodeSet pattern (depends on #11891 AND probably slower. No support in collationdatareader.cpp) 
  22 int main(int argc
, const char *argv
[]) { 
  23     UErrorCode errorCode 
= U_ZERO_ERROR
; 
  25     // Get the unsafeBackwardsSet 
  26     const CollationCacheEntry 
*rootEntry 
= CollationRoot::getRootCacheEntry(errorCode
); 
  27     if(U_FAILURE(errorCode
)) { 
  28       fprintf(stderr
, "Err: %s getting root cache entry\n", u_errorName(errorCode
)); 
  31     const UVersionInfo 
&version 
= rootEntry
->tailoring
->version
; 
  32     const UnicodeSet 
*unsafeBackwardSet 
= rootEntry
->tailoring
->unsafeBackwardSet
; 
  34     u_versionToString(version
, verString
); 
  35     fprintf(stderr
, "Generating data for ICU %s, Collation %s\n", U_ICU_VERSION
, verString
); 
  36     int32_t rangeCount 
= unsafeBackwardSet
->getRangeCount(); 
  39     fprintf(stderr
, ".. serializing\n"); 
  40     // UnicodeSet serialization 
  42     UErrorCode preflightCode 
= U_ZERO_ERROR
; 
  44     int32_t serializedCount 
= unsafeBackwardSet
->serialize(NULL
,0,preflightCode
); 
  45     if(U_FAILURE(preflightCode
) && preflightCode 
!= U_BUFFER_OVERFLOW_ERROR
) { 
  46       fprintf(stderr
, "Err: %s preflighting unicode set\n", u_errorName(preflightCode
)); 
  49     uint16_t *serializedData 
= new uint16_t[serializedCount
]; 
  51     unsafeBackwardSet
->serialize(serializedData
, serializedCount
, errorCode
); 
  52     if(U_FAILURE(errorCode
)) { 
  53       delete [] serializedData
; 
  54       fprintf(stderr
, "Err: %s serializing unicodeset\n", u_errorName(errorCode
)); 
  60     fprintf(stderr
,".. pattern. (Note: collationdatareader.cpp does not support this form also see #11891)\n"); 
  61     // attempt to use pattern 
  63     UnicodeString pattern
; 
  64     UnicodeSet 
set(*unsafeBackwardSet
); 
  66     set
.toPattern(pattern
, FALSE
); 
  68     if(U_SUCCESS(errorCode
)) { 
  69       // This fails (bug# ?) - which is why this method was abandoned. 
  71       // UnicodeSet usA(pattern, errorCode); 
  72       // fprintf(stderr, "\n%s:%d: err creating set A %s\n", __FILE__, __LINE__, u_errorName(errorCode)); 
  77     const UChar 
*buf 
= pattern
.getBuffer(); 
  78     int32_t needed 
= pattern
.length(); 
  83       int32_t len2 
= pattern
.extract(0, pattern
.length(), buf2
, "utf-8"); 
  85       fprintf(stderr
,"===\n%s\n===\n", buf2
); 
  88     const UnicodeString 
unsafeBackwardPattern(FALSE
, buf
, needed
); 
  89   if(U_SUCCESS(errorCode
)) { 
  90     //UnicodeSet us(unsafeBackwardPattern, errorCode); 
  91     //    fprintf(stderr, "\n%s:%d: err creating set %s\n", __FILE__, __LINE__, u_errorName(errorCode)); 
  93     fprintf(stderr
, "Uset OK - \n"); 
  98   // Generate the output file. 
 100   printf("// collunsafe.h\n"); 
 101   printf("// %s\n", U_COPYRIGHT_STRING
); 
 103   printf("// To be included by collationdatareader.cpp, and generated by gencolusb.\n"); 
 104   printf("// Machine generated, do not edit.\n"); 
 106   printf("#ifndef COLLUNSAFE_H\n" 
 107          "#define COLLUNSAFE_H\n" 
 109          "#include \"unicode/utypes.h\"\n" 
 111          "#define COLLUNSAFE_ICU_VERSION \"" U_ICU_VERSION 
"\"\n"); 
 112   printf("#define COLLUNSAFE_COLL_VERSION \"%s\"\n", verString
); 
 117   printf("#define COLLUNSAFE_PATTERN 1\n"); 
 118   printf("static const int32_t collunsafe_len = %d;\n", needed
); 
 119   printf("static const UChar collunsafe_pattern[collunsafe_len] = {\n"); 
 120   for(int i
=0;i
<needed
;i
++) { 
 121     if( (i
>0) && (i%8 
== 0) ) { 
 122       printf(" // %d\n", i
); 
 124     printf("0x%04X", buf
[i
]); // TODO check 
 125     if(i 
!= (needed
-1)) { 
 129   printf(" //%d\n};\n", (needed
-1)); 
 133     fprintf(stderr
, "COLLUNSAFE_RANGE - no code support in collationdatareader.cpp for this\n"); 
 134     printf("#define COLLUNSAFE_RANGE 1\n"); 
 135     printf("static const int32_t unsafe_rangeCount = %d;\n", rangeCount
); 
 136     printf("static const UChar32 unsafe_ranges[%d] = { \n", rangeCount
*2); 
 137     for(int32_t i
=0;i
<rangeCount
;i
++) { 
 138       printf(" 0x%04X, 0x%04X, // %d\n", 
 139              unsafeBackwardSet
->getRangeStart(i
), 
 140              unsafeBackwardSet
->getRangeEnd(i
), 
 147     printf("#define COLLUNSAFE_SERIALIZE 1\n");     
 148     printf("static const int32_t unsafe_serializedCount = %d;\n", serializedCount
); 
 149     printf("static const uint16_t unsafe_serializedData[%d] = { \n", serializedCount
); 
 150     for(int32_t i
=0;i
<serializedCount
;i
++) { 
 151       if( (i
>0) && (i%8 
== 0) ) { 
 152         printf(" // %d\n", i
); 
 154       printf("0x%04X", serializedData
[i
]); // TODO check 
 155       if(i 
!= (serializedCount
-1)) { 
 165     return(U_SUCCESS(errorCode
)?0:1);