1 // © 2016 and later: Unicode, Inc. and others. 
   2 // License & terms of use: http://www.unicode.org/copyright.html 
   4 ******************************************************************************* 
   6 *   Copyright (C) 2008-2011, International Business Machines 
   7 *   Corporation, Google and others.  All Rights Reserved. 
   9 ******************************************************************************* 
  11 // Author : eldawy@google.com (Mohamed Eldawy) 
  14 // Purpose: To generate a list of encodings capable of handling 
  15 // a given Unicode text 
  17 // Started 09-April-2008 
  22  * This is an implementation of an encoding selector. 
  23  * The goal is, given a unicode string, find the encodings 
  24  * this string can be mapped to. To make processing faster 
  25  * a trie is built when you call ucnvsel_open() that 
  26  * stores all encodings a codepoint can map to 
  29 #include "unicode/ucnvsel.h" 
  31 #if !UCONFIG_NO_CONVERSION 
  35 #include "unicode/uchar.h" 
  36 #include "unicode/uniset.h" 
  37 #include "unicode/ucnv.h" 
  38 #include "unicode/ustring.h" 
  39 #include "unicode/uchriter.h" 
  51 struct UConverterSelector 
{ 
  52   UTrie2 
*trie
;              // 16 bit trie containing offsets into pv 
  53   uint32_t* pv
;              // table of bits! 
  55   char** encodings
;          // which encodings did user ask to use? 
  56   int32_t encodingsCount
; 
  57   int32_t encodingStrLength
; 
  59   UBool ownPv
, ownEncodingStrings
; 
  62 static void generateSelectorData(UConverterSelector
* result
, 
  64                                  const USet
* excludedCodePoints
, 
  65                                  const UConverterUnicodeSet whichSet
, 
  67   if (U_FAILURE(*status
)) { 
  71   int32_t columns 
= (result
->encodingsCount
+31)/32; 
  73   // set errorValue to all-ones 
  74   for (int32_t col 
= 0; col 
< columns
; col
++) { 
  75     upvec_setValue(upvec
, UPVEC_ERROR_VALUE_CP
, UPVEC_ERROR_VALUE_CP
, 
  76                    col
, static_cast<uint32_t>(~0), static_cast<uint32_t>(~0), status
); 
  79   for (int32_t i 
= 0; i 
< result
->encodingsCount
; ++i
) { 
  84     UConverter
* test_converter 
= ucnv_open(result
->encodings
[i
], status
); 
  85     if (U_FAILURE(*status
)) { 
  88     USet
* unicode_point_set
; 
  89     unicode_point_set 
= uset_open(1, 0);  // empty set 
  91     ucnv_getUnicodeSet(test_converter
, unicode_point_set
, 
  93     if (U_FAILURE(*status
)) { 
  94       ucnv_close(test_converter
); 
 100     // now iterate over intervals on set i! 
 101     item_count 
= uset_getItemCount(unicode_point_set
); 
 103     for (j 
= 0; j 
< item_count
; ++j
) { 
 106       UErrorCode smallStatus 
= U_ZERO_ERROR
; 
 107       uset_getItem(unicode_point_set
, j
, &start_char
, &end_char
, NULL
, 0, 
 109       if (U_FAILURE(smallStatus
)) { 
 110         // this will be reached for the converters that fill the set with 
 111         // strings. Those should be ignored by our system 
 113         upvec_setValue(upvec
, start_char
, end_char
, column
, static_cast<uint32_t>(~0), mask
, 
 117     ucnv_close(test_converter
); 
 118     uset_close(unicode_point_set
); 
 119     if (U_FAILURE(*status
)) { 
 124   // handle excluded encodings! Simply set their values to all 1's in the upvec 
 125   if (excludedCodePoints
) { 
 126     int32_t item_count 
= uset_getItemCount(excludedCodePoints
); 
 127     for (int32_t j 
= 0; j 
< item_count
; ++j
) { 
 131       uset_getItem(excludedCodePoints
, j
, &start_char
, &end_char
, NULL
, 0, 
 133       for (int32_t col 
= 0; col 
< columns
; col
++) { 
 134         upvec_setValue(upvec
, start_char
, end_char
, col
, static_cast<uint32_t>(~0), static_cast<uint32_t>(~0), 
 140   // alright. Now, let's put things in the same exact form you'd get when you 
 141   // unserialize things. 
 142   result
->trie 
= upvec_compactToUTrie2WithRowIndexes(upvec
, status
); 
 143   result
->pv 
= upvec_cloneArray(upvec
, &result
->pvCount
, NULL
, status
); 
 144   result
->pvCount 
*= columns
;  // number of uint32_t = rows * columns 
 145   result
->ownPv 
= TRUE
; 
 148 /* open a selector. If converterListSize is 0, build for all converters. 
 149    If excludedCodePoints is NULL, don't exclude any codepoints */ 
 150 U_CAPI UConverterSelector
* U_EXPORT2
 
 151 ucnvsel_open(const char* const*  converterList
, int32_t converterListSize
, 
 152              const USet
* excludedCodePoints
, 
 153              const UConverterUnicodeSet whichSet
, UErrorCode
* status
) { 
 154   // check if already failed 
 155   if (U_FAILURE(*status
)) { 
 158   // ensure args make sense! 
 159   if (converterListSize 
< 0 || (converterList 
== NULL 
&& converterListSize 
!= 0)) { 
 160     *status 
= U_ILLEGAL_ARGUMENT_ERROR
; 
 164   // allocate a new converter 
 165   LocalUConverterSelectorPointer 
newSelector( 
 166     (UConverterSelector
*)uprv_malloc(sizeof(UConverterSelector
))); 
 167   if (newSelector
.isNull()) { 
 168     *status 
= U_MEMORY_ALLOCATION_ERROR
; 
 171   uprv_memset(newSelector
.getAlias(), 0, sizeof(UConverterSelector
)); 
 173   if (converterListSize 
== 0) { 
 174     converterList 
= NULL
; 
 175     converterListSize 
= ucnv_countAvailable(); 
 177   newSelector
->encodings 
= 
 178     (char**)uprv_malloc(converterListSize 
* sizeof(char*)); 
 179   if (!newSelector
->encodings
) { 
 180     *status 
= U_MEMORY_ALLOCATION_ERROR
; 
 183   newSelector
->encodings
[0] = NULL
;  // now we can call ucnvsel_close() 
 185   // make a backup copy of the list of converters 
 186   int32_t totalSize 
= 0; 
 188   for (i 
= 0; i 
< converterListSize
; i
++) { 
 190       (int32_t)uprv_strlen(converterList 
!= NULL 
? converterList
[i
] : ucnv_getAvailableName(i
)) + 1; 
 192   // 4-align the totalSize to 4-align the size of the serialized form 
 193   int32_t encodingStrPadding 
= totalSize 
& 3; 
 194   if (encodingStrPadding 
!= 0) { 
 195     encodingStrPadding 
= 4 - encodingStrPadding
; 
 197   newSelector
->encodingStrLength 
= totalSize 
+= encodingStrPadding
; 
 198   char* allStrings 
= (char*) uprv_malloc(totalSize
); 
 200     *status 
= U_MEMORY_ALLOCATION_ERROR
; 
 204   for (i 
= 0; i 
< converterListSize
; i
++) { 
 205     newSelector
->encodings
[i
] = allStrings
; 
 206     uprv_strcpy(newSelector
->encodings
[i
], 
 207                 converterList 
!= NULL 
? converterList
[i
] : ucnv_getAvailableName(i
)); 
 208     allStrings 
+= uprv_strlen(newSelector
->encodings
[i
]) + 1; 
 210   while (encodingStrPadding 
> 0) { 
 212     --encodingStrPadding
; 
 215   newSelector
->ownEncodingStrings 
= TRUE
; 
 216   newSelector
->encodingsCount 
= converterListSize
; 
 217   UPropsVectors 
*upvec 
= upvec_open((converterListSize
+31)/32, status
); 
 218   generateSelectorData(newSelector
.getAlias(), upvec
, excludedCodePoints
, whichSet
, status
); 
 221   if (U_FAILURE(*status
)) { 
 225   return newSelector
.orphan(); 
 228 /* close opened selector */ 
 229 U_CAPI 
void U_EXPORT2
 
 230 ucnvsel_close(UConverterSelector 
*sel
) { 
 234   if (sel
->ownEncodingStrings
) { 
 235     uprv_free(sel
->encodings
[0]); 
 237   uprv_free(sel
->encodings
); 
 241   utrie2_close(sel
->trie
); 
 242   uprv_free(sel
->swapped
); 
 246 static const UDataInfo dataInfo 
= { 
 255   { 0x43, 0x53, 0x65, 0x6c },   /* dataFormat="CSel" */ 
 256   { 1, 0, 0, 0 },               /* formatVersion */ 
 257   { 0, 0, 0, 0 }                /* dataVersion */ 
 261   UCNVSEL_INDEX_TRIE_SIZE
,      // trie size in bytes 
 262   UCNVSEL_INDEX_PV_COUNT
,       // number of uint32_t in the bit vectors 
 263   UCNVSEL_INDEX_NAMES_COUNT
,    // number of encoding names 
 264   UCNVSEL_INDEX_NAMES_LENGTH
,   // number of encoding name bytes including padding 
 265   UCNVSEL_INDEX_SIZE 
= 15,      // bytes following the DataHeader 
 266   UCNVSEL_INDEX_COUNT 
= 16 
 270  * Serialized form of a UConverterSelector, formatVersion 1: 
 272  * The serialized form begins with a standard ICU DataHeader with a UDataInfo 
 273  * as the template above. 
 274  * This is followed by: 
 275  *   int32_t indexes[UCNVSEL_INDEX_COUNT];          // see index entry constants above 
 276  *   serialized UTrie2;                             // indexes[UCNVSEL_INDEX_TRIE_SIZE] bytes 
 277  *   uint32_t pv[indexes[UCNVSEL_INDEX_PV_COUNT]];  // bit vectors 
 278  *   char* encodingNames[indexes[UCNVSEL_INDEX_NAMES_LENGTH]];  // NUL-terminated strings + padding 
 281 /* serialize a selector */ 
 282 U_CAPI 
int32_t U_EXPORT2
 
 283 ucnvsel_serialize(const UConverterSelector
* sel
, 
 284                   void* buffer
, int32_t bufferCapacity
, UErrorCode
* status
) { 
 285   // check if already failed 
 286   if (U_FAILURE(*status
)) { 
 289   // ensure args make sense! 
 290   uint8_t *p 
= (uint8_t *)buffer
; 
 291   if (bufferCapacity 
< 0 || 
 292       (bufferCapacity 
> 0 && (p 
== NULL 
|| (U_POINTER_MASK_LSB(p
, 3) != 0))) 
 294     *status 
= U_ILLEGAL_ARGUMENT_ERROR
; 
 297   // add up the size of the serialized form 
 298   int32_t serializedTrieSize 
= utrie2_serialize(sel
->trie
, NULL
, 0, status
); 
 299   if (*status 
!= U_BUFFER_OVERFLOW_ERROR 
&& U_FAILURE(*status
)) { 
 302   *status 
= U_ZERO_ERROR
; 
 305   uprv_memset(&header
, 0, sizeof(header
)); 
 306   header
.dataHeader
.headerSize 
= (uint16_t)((sizeof(header
) + 15) & ~15); 
 307   header
.dataHeader
.magic1 
= 0xda; 
 308   header
.dataHeader
.magic2 
= 0x27; 
 309   uprv_memcpy(&header
.info
, &dataInfo
, sizeof(dataInfo
)); 
 311   int32_t indexes
[UCNVSEL_INDEX_COUNT
] = { 
 315     sel
->encodingStrLength
 
 319     header
.dataHeader
.headerSize 
+ 
 320     (int32_t)sizeof(indexes
) + 
 323     sel
->encodingStrLength
; 
 324   indexes
[UCNVSEL_INDEX_SIZE
] = totalSize 
- header
.dataHeader
.headerSize
; 
 325   if (totalSize 
> bufferCapacity
) { 
 326     *status 
= U_BUFFER_OVERFLOW_ERROR
; 
 330   int32_t length 
= header
.dataHeader
.headerSize
; 
 331   uprv_memcpy(p
, &header
, sizeof(header
)); 
 332   uprv_memset(p 
+ sizeof(header
), 0, length 
- sizeof(header
)); 
 335   length 
= (int32_t)sizeof(indexes
); 
 336   uprv_memcpy(p
, indexes
, length
); 
 339   utrie2_serialize(sel
->trie
, p
, serializedTrieSize
, status
); 
 340   p 
+= serializedTrieSize
; 
 342   length 
= sel
->pvCount 
* 4; 
 343   uprv_memcpy(p
, sel
->pv
, length
); 
 346   uprv_memcpy(p
, sel
->encodings
[0], sel
->encodingStrLength
); 
 347   p 
+= sel
->encodingStrLength
; 
 353  * swap a selector into the desired Endianness and Asciiness of 
 354  * the system. Just as FYI, selectors are always saved in the format 
 355  * of the system that created them. They are only converted if used 
 356  * on another system. In other words, selectors created on different 
 357  * system can be different even if the params are identical (endianness 
 358  * and Asciiness differences only) 
 360  * @param ds pointer to data swapper containing swapping info 
 361  * @param inData pointer to incoming data 
 362  * @param length length of inData in bytes 
 363  * @param outData pointer to output data. Capacity should 
 364  *                be at least equal to capacity of inData 
 365  * @param status an in/out ICU UErrorCode 
 366  * @return 0 on failure, number of bytes swapped on success 
 367  *         number of bytes swapped can be smaller than length 
 370 ucnvsel_swap(const UDataSwapper 
*ds
, 
 371              const void *inData
, int32_t length
, 
 372              void *outData
, UErrorCode 
*status
) { 
 373   /* udata_swapDataHeader checks the arguments */ 
 374   int32_t headerSize 
= udata_swapDataHeader(ds
, inData
, length
, outData
, status
); 
 375   if(U_FAILURE(*status
)) { 
 379   /* check data format and format version */ 
 380   const UDataInfo 
*pInfo 
= (const UDataInfo 
*)((const char *)inData 
+ 4); 
 382     pInfo
->dataFormat
[0] == 0x43 &&  /* dataFormat="CSel" */ 
 383     pInfo
->dataFormat
[1] == 0x53 && 
 384     pInfo
->dataFormat
[2] == 0x65 && 
 385     pInfo
->dataFormat
[3] == 0x6c 
 387     udata_printError(ds
, "ucnvsel_swap(): data format %02x.%02x.%02x.%02x is not recognized as UConverterSelector data\n", 
 388                      pInfo
->dataFormat
[0], pInfo
->dataFormat
[1], 
 389                      pInfo
->dataFormat
[2], pInfo
->dataFormat
[3]); 
 390     *status 
= U_INVALID_FORMAT_ERROR
; 
 393   if(pInfo
->formatVersion
[0] != 1) { 
 394     udata_printError(ds
, "ucnvsel_swap(): format version %02x is not supported\n", 
 395                      pInfo
->formatVersion
[0]); 
 396     *status 
= U_UNSUPPORTED_ERROR
; 
 401     length 
-= headerSize
; 
 403       udata_printError(ds
, "ucnvsel_swap(): too few bytes (%d after header) for UConverterSelector data\n", 
 405       *status 
= U_INDEX_OUTOFBOUNDS_ERROR
; 
 410   const uint8_t *inBytes 
= (const uint8_t *)inData 
+ headerSize
; 
 411   uint8_t *outBytes 
= (uint8_t *)outData 
+ headerSize
; 
 413   /* read the indexes */ 
 414   const int32_t *inIndexes 
= (const int32_t *)inBytes
; 
 417   for(i 
= 0; i 
< 16; ++i
) { 
 418     indexes
[i
] = udata_readInt32(ds
, inIndexes
[i
]); 
 421   /* get the total length of the data */ 
 422   int32_t size 
= indexes
[UCNVSEL_INDEX_SIZE
]; 
 425       udata_printError(ds
, "ucnvsel_swap(): too few bytes (%d after header) for all of UConverterSelector data\n", 
 427       *status 
= U_INDEX_OUTOFBOUNDS_ERROR
; 
 431     /* copy the data for inaccessible bytes */ 
 432     if(inBytes 
!= outBytes
) { 
 433       uprv_memcpy(outBytes
, inBytes
, size
); 
 436     int32_t offset 
= 0, count
; 
 438     /* swap the int32_t indexes[] */ 
 439     count 
= UCNVSEL_INDEX_COUNT
*4; 
 440     ds
->swapArray32(ds
, inBytes
, count
, outBytes
, status
); 
 443     /* swap the UTrie2 */ 
 444     count 
= indexes
[UCNVSEL_INDEX_TRIE_SIZE
]; 
 445     utrie2_swap(ds
, inBytes 
+ offset
, count
, outBytes 
+ offset
, status
); 
 448     /* swap the uint32_t pv[] */ 
 449     count 
= indexes
[UCNVSEL_INDEX_PV_COUNT
]*4; 
 450     ds
->swapArray32(ds
, inBytes 
+ offset
, count
, outBytes 
+ offset
, status
); 
 453     /* swap the encoding names */ 
 454     count 
= indexes
[UCNVSEL_INDEX_NAMES_LENGTH
]; 
 455     ds
->swapInvChars(ds
, inBytes 
+ offset
, count
, outBytes 
+ offset
, status
); 
 458     U_ASSERT(offset 
== size
); 
 461   return headerSize 
+ size
; 
 464 /* unserialize a selector */ 
 465 U_CAPI UConverterSelector
* U_EXPORT2
 
 466 ucnvsel_openFromSerialized(const void* buffer
, int32_t length
, UErrorCode
* status
) { 
 467   // check if already failed 
 468   if (U_FAILURE(*status
)) { 
 471   // ensure args make sense! 
 472   const uint8_t *p 
= (const uint8_t *)buffer
; 
 474       (length 
> 0 && (p 
== NULL 
|| (U_POINTER_MASK_LSB(p
, 3) != 0))) 
 476     *status 
= U_ILLEGAL_ARGUMENT_ERROR
; 
 481     // not even enough space for a minimal header 
 482     *status 
= U_INDEX_OUTOFBOUNDS_ERROR
; 
 485   const DataHeader 
*pHeader 
= (const DataHeader 
*)p
; 
 487     pHeader
->dataHeader
.magic1
==0xda && 
 488     pHeader
->dataHeader
.magic2
==0x27 && 
 489     pHeader
->info
.dataFormat
[0] == 0x43 && 
 490     pHeader
->info
.dataFormat
[1] == 0x53 && 
 491     pHeader
->info
.dataFormat
[2] == 0x65 && 
 492     pHeader
->info
.dataFormat
[3] == 0x6c 
 494     /* header not valid or dataFormat not recognized */ 
 495     *status 
= U_INVALID_FORMAT_ERROR
; 
 498   if (pHeader
->info
.formatVersion
[0] != 1) { 
 499     *status 
= U_UNSUPPORTED_ERROR
; 
 502   uint8_t* swapped 
= NULL
; 
 503   if (pHeader
->info
.isBigEndian 
!= U_IS_BIG_ENDIAN 
|| 
 504       pHeader
->info
.charsetFamily 
!= U_CHARSET_FAMILY
 
 508       udata_openSwapperForInputData(p
, length
, U_IS_BIG_ENDIAN
, U_CHARSET_FAMILY
, status
); 
 509     int32_t totalSize 
= ucnvsel_swap(ds
, p
, -1, NULL
, status
); 
 510     if (U_FAILURE(*status
)) { 
 511       udata_closeSwapper(ds
); 
 514     if (length 
< totalSize
) { 
 515       udata_closeSwapper(ds
); 
 516       *status 
= U_INDEX_OUTOFBOUNDS_ERROR
; 
 519     swapped 
= (uint8_t*)uprv_malloc(totalSize
); 
 520     if (swapped 
== NULL
) { 
 521       udata_closeSwapper(ds
); 
 522       *status 
= U_MEMORY_ALLOCATION_ERROR
; 
 525     ucnvsel_swap(ds
, p
, length
, swapped
, status
); 
 526     udata_closeSwapper(ds
); 
 527     if (U_FAILURE(*status
)) { 
 532     pHeader 
= (const DataHeader 
*)p
; 
 534   if (length 
< (pHeader
->dataHeader
.headerSize 
+ 16 * 4)) { 
 535     // not even enough space for the header and the indexes 
 537     *status 
= U_INDEX_OUTOFBOUNDS_ERROR
; 
 540   p 
+= pHeader
->dataHeader
.headerSize
; 
 541   length 
-= pHeader
->dataHeader
.headerSize
; 
 543   const int32_t *indexes 
= (const int32_t *)p
; 
 544   if (length 
< indexes
[UCNVSEL_INDEX_SIZE
]) { 
 546     *status 
= U_INDEX_OUTOFBOUNDS_ERROR
; 
 549   p 
+= UCNVSEL_INDEX_COUNT 
* 4; 
 550   // create and populate the selector object 
 551   UConverterSelector
* sel 
= (UConverterSelector
*)uprv_malloc(sizeof(UConverterSelector
)); 
 553     (char **)uprv_malloc( 
 554       indexes
[UCNVSEL_INDEX_NAMES_COUNT
] * sizeof(char *)); 
 555   if (sel 
== NULL 
|| encodings 
== NULL
) { 
 558     uprv_free(encodings
); 
 559     *status 
= U_MEMORY_ALLOCATION_ERROR
; 
 562   uprv_memset(sel
, 0, sizeof(UConverterSelector
)); 
 563   sel
->pvCount 
= indexes
[UCNVSEL_INDEX_PV_COUNT
]; 
 564   sel
->encodings 
= encodings
; 
 565   sel
->encodingsCount 
= indexes
[UCNVSEL_INDEX_NAMES_COUNT
]; 
 566   sel
->encodingStrLength 
= indexes
[UCNVSEL_INDEX_NAMES_LENGTH
]; 
 567   sel
->swapped 
= swapped
; 
 569   sel
->trie 
= utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS
, 
 570                                         p
, indexes
[UCNVSEL_INDEX_TRIE_SIZE
], NULL
, 
 572   p 
+= indexes
[UCNVSEL_INDEX_TRIE_SIZE
]; 
 573   if (U_FAILURE(*status
)) { 
 578   sel
->pv 
= (uint32_t *)p
; 
 579   p 
+= sel
->pvCount 
* 4; 
 582   for (int32_t i 
= 0; i 
< sel
->encodingsCount
; ++i
) { 
 583     sel
->encodings
[i
] = s
; 
 584     s 
+= uprv_strlen(s
) + 1; 
 586   p 
+= sel
->encodingStrLength
; 
 591 // a bunch of functions for the enumeration thingie! Nothing fancy here. Just 
 592 // iterate over the selected encodings 
 597   const UConverterSelector
* sel
; 
 602 static void U_CALLCONV
 
 603 ucnvsel_close_selector_iterator(UEnumeration 
*enumerator
) { 
 604   uprv_free(((Enumerator
*)(enumerator
->context
))->index
); 
 605   uprv_free(enumerator
->context
); 
 606   uprv_free(enumerator
); 
 610 static int32_t U_CALLCONV
 
 611 ucnvsel_count_encodings(UEnumeration 
*enumerator
, UErrorCode 
*status
) { 
 612   // check if already failed 
 613   if (U_FAILURE(*status
)) { 
 616   return ((Enumerator
*)(enumerator
->context
))->length
; 
 620 static const char* U_CALLCONV 
ucnvsel_next_encoding(UEnumeration
* enumerator
, 
 621                                                  int32_t* resultLength
, 
 622                                                  UErrorCode
* status
) { 
 623   // check if already failed 
 624   if (U_FAILURE(*status
)) { 
 628   int16_t cur 
= ((Enumerator
*)(enumerator
->context
))->cur
; 
 629   const UConverterSelector
* sel
; 
 631   if (cur 
>= ((Enumerator
*)(enumerator
->context
))->length
) { 
 634   sel 
= ((Enumerator
*)(enumerator
->context
))->sel
; 
 635   result 
= sel
->encodings
[((Enumerator
*)(enumerator
->context
))->index
[cur
] ]; 
 636   ((Enumerator
*)(enumerator
->context
))->cur
++; 
 638     *resultLength 
= (int32_t)uprv_strlen(result
); 
 643 static void U_CALLCONV 
ucnvsel_reset_iterator(UEnumeration
* enumerator
, 
 644                                            UErrorCode
* status
) { 
 645   // check if already failed 
 646   if (U_FAILURE(*status
)) { 
 649   ((Enumerator
*)(enumerator
->context
))->cur 
= 0; 
 655 static const UEnumeration defaultEncodings 
= { 
 658     ucnvsel_close_selector_iterator
, 
 659     ucnvsel_count_encodings
, 
 661     ucnvsel_next_encoding
,  
 662     ucnvsel_reset_iterator
 
 666 // internal fn to intersect two sets of masks 
 667 // returns whether the mask has reduced to all zeros 
 668 static UBool 
intersectMasks(uint32_t* dest
, const uint32_t* source1
, int32_t len
) { 
 670   uint32_t oredDest 
= 0; 
 671   for (i 
= 0 ; i 
< len 
; ++i
) { 
 672     oredDest 
|= (dest
[i
] &= source1
[i
]); 
 674   return oredDest 
== 0; 
 677 // internal fn to count how many 1's are there in a mask 
 678 // algorithm taken from  http://graphics.stanford.edu/~seander/bithacks.html 
 679 static int16_t countOnes(uint32_t* mask
, int32_t len
) { 
 680   int32_t i
, totalOnes 
= 0; 
 681   for (i 
= 0 ; i 
< len 
; ++i
) { 
 682     uint32_t ent 
= mask
[i
]; 
 683     for (; ent
; totalOnes
++) 
 685       ent 
&= ent 
- 1; // clear the least significant bit set 
 688   return static_cast<int16_t>(totalOnes
); 
 692 /* internal function! */ 
 693 static UEnumeration 
*selectForMask(const UConverterSelector
* sel
, 
 694                                    uint32_t *mask
, UErrorCode 
*status
) { 
 695   // this is the context we will use. Store a table of indices to which 
 696   // encodings are legit. 
 697   struct Enumerator
* result 
= (Enumerator
*)uprv_malloc(sizeof(Enumerator
)); 
 698   if (result 
== NULL
) { 
 700     *status 
= U_MEMORY_ALLOCATION_ERROR
; 
 703   result
->index 
= NULL
;  // this will be allocated later! 
 704   result
->length 
= result
->cur 
= 0; 
 707   UEnumeration 
*en 
= (UEnumeration 
*)uprv_malloc(sizeof(UEnumeration
)); 
 709     // TODO(markus): Combine Enumerator and UEnumeration into one struct. 
 712     *status 
= U_MEMORY_ALLOCATION_ERROR
; 
 715   memcpy(en
, &defaultEncodings
, sizeof(UEnumeration
)); 
 716   en
->context 
= result
; 
 718   int32_t columns 
= (sel
->encodingsCount
+31)/32; 
 719   int16_t numOnes 
= countOnes(mask
, columns
); 
 720   // now, we know the exact space we need for index 
 722     result
->index 
= (int16_t*) uprv_malloc(numOnes 
* sizeof(int16_t)); 
 726     for (j 
= 0 ; j 
< columns
; j
++) { 
 727       uint32_t v 
= mask
[j
]; 
 728       for (i 
= 0 ; i 
< 32 && k 
< sel
->encodingsCount
; i
++, k
++) { 
 730           result
->index
[result
->length
++] = k
; 
 735   } //otherwise, index will remain NULL (and will never be touched by 
 736     //the enumerator code anyway) 
 741 /* check a string against the selector - UTF16 version */ 
 742 U_CAPI UEnumeration 
* U_EXPORT2
 
 743 ucnvsel_selectForString(const UConverterSelector
* sel
, 
 744                         const UChar 
*s
, int32_t length
, UErrorCode 
*status
) { 
 745   // check if already failed 
 746   if (U_FAILURE(*status
)) { 
 749   // ensure args make sense! 
 750   if (sel 
== NULL 
|| (s 
== NULL 
&& length 
!= 0)) { 
 751     *status 
= U_ILLEGAL_ARGUMENT_ERROR
; 
 755   int32_t columns 
= (sel
->encodingsCount
+31)/32; 
 756   uint32_t* mask 
= (uint32_t*) uprv_malloc(columns 
* 4); 
 758     *status 
= U_MEMORY_ALLOCATION_ERROR
; 
 761   uprv_memset(mask
, ~0, columns 
*4); 
 771     while (limit 
== NULL 
? *s 
!= 0 : s 
!= limit
) { 
 774       UTRIE2_U16_NEXT16(sel
->trie
, s
, limit
, c
, pvIndex
); 
 775       if (intersectMasks(mask
, sel
->pv
+pvIndex
, columns
)) { 
 780   return selectForMask(sel
, mask
, status
); 
 783 /* check a string against the selector - UTF8 version */ 
 784 U_CAPI UEnumeration 
* U_EXPORT2
 
 785 ucnvsel_selectForUTF8(const UConverterSelector
* sel
, 
 786                       const char *s
, int32_t length
, UErrorCode 
*status
) { 
 787   // check if already failed 
 788   if (U_FAILURE(*status
)) { 
 791   // ensure args make sense! 
 792   if (sel 
== NULL 
|| (s 
== NULL 
&& length 
!= 0)) { 
 793     *status 
= U_ILLEGAL_ARGUMENT_ERROR
; 
 797   int32_t columns 
= (sel
->encodingsCount
+31)/32; 
 798   uint32_t* mask 
= (uint32_t*) uprv_malloc(columns 
* 4); 
 800     *status 
= U_MEMORY_ALLOCATION_ERROR
; 
 803   uprv_memset(mask
, ~0, columns 
*4); 
 806     length 
= (int32_t)uprv_strlen(s
); 
 810     const char *limit 
= s 
+ length
; 
 814       UTRIE2_U8_NEXT16(sel
->trie
, s
, limit
, pvIndex
); 
 815       if (intersectMasks(mask
, sel
->pv
+pvIndex
, columns
)) { 
 820   return selectForMask(sel
, mask
, status
); 
 823 #endif  // !UCONFIG_NO_CONVERSION