2 *******************************************************************************
4 * Copyright (C) 2008-2009, International Business Machines
5 * Corporation, Google and others. All Rights Reserved.
7 *******************************************************************************
9 // Author : eldawy@google.com (Mohamed Eldawy)
12 // Purpose: To generate a list of encodings capable of handling
13 // a given Unicode text
15 // Started 09-April-2008
20 * This is an implementation of an encoding selector.
21 * The goal is, given a unicode string, find the encodings
22 * this string can be mapped to. To make processing faster
23 * a trie is built when you call ucnvsel_open() that
24 * stores all encodings a codepoint can map to
27 #include "unicode/ucnvsel.h"
31 #include "unicode/uchar.h"
32 #include "unicode/uniset.h"
33 #include "unicode/ucnv.h"
34 #include "unicode/ustring.h"
35 #include "unicode/uchriter.h"
46 struct UConverterSelector
{
47 UTrie2
*trie
; // 16 bit trie containing offsets into pv
48 uint32_t* pv
; // table of bits!
50 char** encodings
; // which encodings did user ask to use?
51 int32_t encodingsCount
;
52 int32_t encodingStrLength
;
54 UBool ownPv
, ownEncodingStrings
;
57 static void generateSelectorData(UConverterSelector
* result
,
59 const USet
* excludedCodePoints
,
60 const UConverterUnicodeSet whichSet
,
62 if (U_FAILURE(*status
)) {
66 int32_t columns
= (result
->encodingsCount
+31)/32;
68 // set errorValue to all-ones
69 for (int32_t col
= 0; col
< columns
; col
++) {
70 upvec_setValue(upvec
, UPVEC_ERROR_VALUE_CP
, UPVEC_ERROR_VALUE_CP
,
74 for (int32_t i
= 0; i
< result
->encodingsCount
; ++i
) {
79 UConverter
* test_converter
= ucnv_open(result
->encodings
[i
], status
);
80 if (U_FAILURE(*status
)) {
83 USet
* unicode_point_set
;
84 unicode_point_set
= uset_open(1, 0); // empty set
86 ucnv_getUnicodeSet(test_converter
, unicode_point_set
,
88 if (U_FAILURE(*status
)) {
89 ucnv_close(test_converter
);
95 // now iterate over intervals on set i!
96 item_count
= uset_getItemCount(unicode_point_set
);
98 for (j
= 0; j
< item_count
; ++j
) {
101 UErrorCode smallStatus
= U_ZERO_ERROR
;
102 uset_getItem(unicode_point_set
, j
, &start_char
, &end_char
, NULL
, 0,
104 if (U_FAILURE(smallStatus
)) {
105 // this will be reached for the converters that fill the set with
106 // strings. Those should be ignored by our system
108 upvec_setValue(upvec
, start_char
, end_char
, column
, ~0, mask
,
112 ucnv_close(test_converter
);
113 uset_close(unicode_point_set
);
114 if (U_FAILURE(*status
)) {
119 // handle excluded encodings! Simply set their values to all 1's in the upvec
120 if (excludedCodePoints
) {
121 int32_t item_count
= uset_getItemCount(excludedCodePoints
);
122 for (int32_t j
= 0; j
< item_count
; ++j
) {
126 uset_getItem(excludedCodePoints
, j
, &start_char
, &end_char
, NULL
, 0,
128 for (int32_t col
= 0; col
< columns
; col
++) {
129 upvec_setValue(upvec
, start_char
, end_char
, col
, ~0, ~0,
135 // alright. Now, let's put things in the same exact form you'd get when you
136 // unserialize things.
137 result
->trie
= upvec_compactToUTrie2WithRowIndexes(upvec
, status
);
138 result
->pv
= upvec_cloneArray(upvec
, &result
->pvCount
, NULL
, status
);
139 result
->pvCount
*= columns
; // number of uint32_t = rows * columns
140 result
->ownPv
= TRUE
;
143 /* open a selector. If converterListSize is 0, build for all converters.
144 If excludedCodePoints is NULL, don't exclude any codepoints */
145 U_CAPI UConverterSelector
* U_EXPORT2
146 ucnvsel_open(const char* const* converterList
, int32_t converterListSize
,
147 const USet
* excludedCodePoints
,
148 const UConverterUnicodeSet whichSet
, UErrorCode
* status
) {
149 // check if already failed
150 if (U_FAILURE(*status
)) {
153 // ensure args make sense!
154 if (converterListSize
< 0 || (converterList
== NULL
&& converterListSize
!= 0)) {
155 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
159 // allocate a new converter
160 LocalUConverterSelectorPointer
newSelector(
161 (UConverterSelector
*)uprv_malloc(sizeof(UConverterSelector
)));
162 if (newSelector
.isNull()) {
163 *status
= U_MEMORY_ALLOCATION_ERROR
;
166 uprv_memset(newSelector
.getAlias(), 0, sizeof(UConverterSelector
));
168 if (converterListSize
== 0) {
169 converterList
= NULL
;
170 converterListSize
= ucnv_countAvailable();
172 newSelector
->encodings
=
173 (char**)uprv_malloc(converterListSize
* sizeof(char*));
174 if (!newSelector
->encodings
) {
175 *status
= U_MEMORY_ALLOCATION_ERROR
;
178 newSelector
->encodings
[0] = NULL
; // now we can call ucnvsel_close()
180 // make a backup copy of the list of converters
181 int32_t totalSize
= 0;
183 for (i
= 0; i
< converterListSize
; i
++) {
185 (int32_t)uprv_strlen(converterList
!= NULL
? converterList
[i
] : ucnv_getAvailableName(i
)) + 1;
187 // 4-align the totalSize to 4-align the size of the serialized form
188 int32_t encodingStrPadding
= totalSize
& 3;
189 if (encodingStrPadding
!= 0) {
190 encodingStrPadding
= 4 - encodingStrPadding
;
192 newSelector
->encodingStrLength
= totalSize
+= encodingStrPadding
;
193 char* allStrings
= (char*) uprv_malloc(totalSize
);
195 *status
= U_MEMORY_ALLOCATION_ERROR
;
199 for (i
= 0; i
< converterListSize
; i
++) {
200 newSelector
->encodings
[i
] = allStrings
;
201 uprv_strcpy(newSelector
->encodings
[i
],
202 converterList
!= NULL
? converterList
[i
] : ucnv_getAvailableName(i
));
203 allStrings
+= uprv_strlen(newSelector
->encodings
[i
]) + 1;
205 while (encodingStrPadding
> 0) {
207 --encodingStrPadding
;
210 newSelector
->ownEncodingStrings
= TRUE
;
211 newSelector
->encodingsCount
= converterListSize
;
212 UPropsVectors
*upvec
= upvec_open((converterListSize
+31)/32, status
);
213 generateSelectorData(newSelector
.getAlias(), upvec
, excludedCodePoints
, whichSet
, status
);
216 if (U_FAILURE(*status
)) {
220 return newSelector
.orphan();
223 /* close opened selector */
224 U_CAPI
void U_EXPORT2
225 ucnvsel_close(UConverterSelector
*sel
) {
229 if (sel
->ownEncodingStrings
) {
230 uprv_free(sel
->encodings
[0]);
232 uprv_free(sel
->encodings
);
236 utrie2_close(sel
->trie
);
237 uprv_free(sel
->swapped
);
241 static const UDataInfo dataInfo
= {
250 { 0x43, 0x53, 0x65, 0x6c }, /* dataFormat="CSel" */
251 { 1, 0, 0, 0 }, /* formatVersion */
252 { 0, 0, 0, 0 } /* dataVersion */
256 UCNVSEL_INDEX_TRIE_SIZE
, // trie size in bytes
257 UCNVSEL_INDEX_PV_COUNT
, // number of uint32_t in the bit vectors
258 UCNVSEL_INDEX_NAMES_COUNT
, // number of encoding names
259 UCNVSEL_INDEX_NAMES_LENGTH
, // number of encoding name bytes including padding
260 UCNVSEL_INDEX_SIZE
= 15, // bytes following the DataHeader
261 UCNVSEL_INDEX_COUNT
= 16
265 * Serialized form of a UConverterSelector, formatVersion 1:
267 * The serialized form begins with a standard ICU DataHeader with a UDataInfo
268 * as the template above.
269 * This is followed by:
270 * int32_t indexes[UCNVSEL_INDEX_COUNT]; // see index entry constants above
271 * serialized UTrie2; // indexes[UCNVSEL_INDEX_TRIE_SIZE] bytes
272 * uint32_t pv[indexes[UCNVSEL_INDEX_PV_COUNT]]; // bit vectors
273 * char* encodingNames[indexes[UCNVSEL_INDEX_NAMES_LENGTH]]; // NUL-terminated strings + padding
276 /* serialize a selector */
277 U_CAPI
int32_t U_EXPORT2
278 ucnvsel_serialize(const UConverterSelector
* sel
,
279 void* buffer
, int32_t bufferCapacity
, UErrorCode
* status
) {
280 // check if already failed
281 if (U_FAILURE(*status
)) {
284 // ensure args make sense!
285 uint8_t *p
= (uint8_t *)buffer
;
286 if (bufferCapacity
< 0 ||
287 (bufferCapacity
> 0 && (p
== NULL
|| (U_POINTER_MASK_LSB(p
, 3) != 0)))
289 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
292 // add up the size of the serialized form
293 int32_t serializedTrieSize
= utrie2_serialize(sel
->trie
, NULL
, 0, status
);
294 if (*status
!= U_BUFFER_OVERFLOW_ERROR
&& U_FAILURE(*status
)) {
297 *status
= U_ZERO_ERROR
;
300 uprv_memset(&header
, 0, sizeof(header
));
301 header
.dataHeader
.headerSize
= (uint16_t)((sizeof(header
) + 15) & ~15);
302 header
.dataHeader
.magic1
= 0xda;
303 header
.dataHeader
.magic2
= 0x27;
304 uprv_memcpy(&header
.info
, &dataInfo
, sizeof(dataInfo
));
306 int32_t indexes
[UCNVSEL_INDEX_COUNT
] = {
310 sel
->encodingStrLength
314 header
.dataHeader
.headerSize
+
315 (int32_t)sizeof(indexes
) +
318 sel
->encodingStrLength
;
319 indexes
[UCNVSEL_INDEX_SIZE
] = totalSize
- header
.dataHeader
.headerSize
;
320 if (totalSize
> bufferCapacity
) {
321 *status
= U_BUFFER_OVERFLOW_ERROR
;
325 int32_t length
= header
.dataHeader
.headerSize
;
326 uprv_memcpy(p
, &header
, sizeof(header
));
327 uprv_memset(p
+ sizeof(header
), 0, length
- sizeof(header
));
330 length
= (int32_t)sizeof(indexes
);
331 uprv_memcpy(p
, indexes
, length
);
334 utrie2_serialize(sel
->trie
, p
, serializedTrieSize
, status
);
335 p
+= serializedTrieSize
;
337 length
= sel
->pvCount
* 4;
338 uprv_memcpy(p
, sel
->pv
, length
);
341 uprv_memcpy(p
, sel
->encodings
[0], sel
->encodingStrLength
);
342 p
+= sel
->encodingStrLength
;
348 * swap a selector into the desired Endianness and Asciiness of
349 * the system. Just as FYI, selectors are always saved in the format
350 * of the system that created them. They are only converted if used
351 * on another system. In other words, selectors created on different
352 * system can be different even if the params are identical (endianness
353 * and Asciiness differences only)
355 * @param ds pointer to data swapper containing swapping info
356 * @param inData pointer to incoming data
357 * @param length length of inData in bytes
358 * @param outData pointer to output data. Capacity should
359 * be at least equal to capacity of inData
360 * @param status an in/out ICU UErrorCode
361 * @return 0 on failure, number of bytes swapped on success
362 * number of bytes swapped can be smaller than length
365 ucnvsel_swap(const UDataSwapper
*ds
,
366 const void *inData
, int32_t length
,
367 void *outData
, UErrorCode
*status
) {
368 /* udata_swapDataHeader checks the arguments */
369 int32_t headerSize
= udata_swapDataHeader(ds
, inData
, length
, outData
, status
);
370 if(U_FAILURE(*status
)) {
374 /* check data format and format version */
375 const UDataInfo
*pInfo
= (const UDataInfo
*)((const char *)inData
+ 4);
377 pInfo
->dataFormat
[0] == 0x43 && /* dataFormat="CSel" */
378 pInfo
->dataFormat
[1] == 0x53 &&
379 pInfo
->dataFormat
[2] == 0x65 &&
380 pInfo
->dataFormat
[3] == 0x6c
382 udata_printError(ds
, "ucnvsel_swap(): data format %02x.%02x.%02x.%02x is not recognized as UConverterSelector data\n",
383 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
384 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3]);
385 *status
= U_INVALID_FORMAT_ERROR
;
388 if(pInfo
->formatVersion
[0] != 1) {
389 udata_printError(ds
, "ucnvsel_swap(): format version %02x is not supported\n",
390 pInfo
->formatVersion
[0]);
391 *status
= U_UNSUPPORTED_ERROR
;
396 length
-= headerSize
;
398 udata_printError(ds
, "ucnvsel_swap(): too few bytes (%d after header) for UConverterSelector data\n",
400 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
405 const uint8_t *inBytes
= (const uint8_t *)inData
+ headerSize
;
406 uint8_t *outBytes
= (uint8_t *)outData
+ headerSize
;
408 /* read the indexes */
409 const int32_t *inIndexes
= (const int32_t *)inBytes
;
412 for(i
= 0; i
< 16; ++i
) {
413 indexes
[i
] = udata_readInt32(ds
, inIndexes
[i
]);
416 /* get the total length of the data */
417 int32_t size
= indexes
[UCNVSEL_INDEX_SIZE
];
420 udata_printError(ds
, "ucnvsel_swap(): too few bytes (%d after header) for all of UConverterSelector data\n",
422 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
426 /* copy the data for inaccessible bytes */
427 if(inBytes
!= outBytes
) {
428 uprv_memcpy(outBytes
, inBytes
, size
);
431 int32_t offset
= 0, count
;
433 /* swap the int32_t indexes[] */
434 count
= UCNVSEL_INDEX_COUNT
*4;
435 ds
->swapArray32(ds
, inBytes
, count
, outBytes
, status
);
438 /* swap the UTrie2 */
439 count
= indexes
[UCNVSEL_INDEX_TRIE_SIZE
];
440 utrie2_swap(ds
, inBytes
+ offset
, count
, outBytes
+ offset
, status
);
443 /* swap the uint32_t pv[] */
444 count
= indexes
[UCNVSEL_INDEX_PV_COUNT
]*4;
445 ds
->swapArray32(ds
, inBytes
+ offset
, count
, outBytes
+ offset
, status
);
448 /* swap the encoding names */
449 count
= indexes
[UCNVSEL_INDEX_NAMES_LENGTH
];
450 ds
->swapInvChars(ds
, inBytes
+ offset
, count
, outBytes
+ offset
, status
);
453 U_ASSERT(offset
== size
);
456 return headerSize
+ size
;
459 /* unserialize a selector */
460 U_CAPI UConverterSelector
* U_EXPORT2
461 ucnvsel_openFromSerialized(const void* buffer
, int32_t length
, UErrorCode
* status
) {
462 // check if already failed
463 if (U_FAILURE(*status
)) {
466 // ensure args make sense!
467 const uint8_t *p
= (const uint8_t *)buffer
;
469 (length
> 0 && (p
== NULL
|| (U_POINTER_MASK_LSB(p
, 3) != 0)))
471 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
476 // not even enough space for a minimal header
477 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
480 const DataHeader
*pHeader
= (const DataHeader
*)p
;
482 pHeader
->dataHeader
.magic1
==0xda &&
483 pHeader
->dataHeader
.magic2
==0x27 &&
484 pHeader
->info
.dataFormat
[0] == 0x43 &&
485 pHeader
->info
.dataFormat
[1] == 0x53 &&
486 pHeader
->info
.dataFormat
[2] == 0x65 &&
487 pHeader
->info
.dataFormat
[3] == 0x6c
489 /* header not valid or dataFormat not recognized */
490 *status
= U_INVALID_FORMAT_ERROR
;
493 if (pHeader
->info
.formatVersion
[0] != 1) {
494 *status
= U_UNSUPPORTED_ERROR
;
497 uint8_t* swapped
= NULL
;
498 if (pHeader
->info
.isBigEndian
!= U_IS_BIG_ENDIAN
||
499 pHeader
->info
.charsetFamily
!= U_CHARSET_FAMILY
503 udata_openSwapperForInputData(p
, length
, U_IS_BIG_ENDIAN
, U_CHARSET_FAMILY
, status
);
504 int32_t totalSize
= ucnvsel_swap(ds
, p
, -1, NULL
, status
);
505 if (U_FAILURE(*status
)) {
506 udata_closeSwapper(ds
);
509 if (length
< totalSize
) {
510 udata_closeSwapper(ds
);
511 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
514 swapped
= (uint8_t*)uprv_malloc(totalSize
);
515 if (swapped
== NULL
) {
516 udata_closeSwapper(ds
);
517 *status
= U_MEMORY_ALLOCATION_ERROR
;
520 ucnvsel_swap(ds
, p
, length
, swapped
, status
);
521 udata_closeSwapper(ds
);
522 if (U_FAILURE(*status
)) {
527 pHeader
= (const DataHeader
*)p
;
529 if (length
< (pHeader
->dataHeader
.headerSize
+ 16 * 4)) {
530 // not even enough space for the header and the indexes
532 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
535 p
+= pHeader
->dataHeader
.headerSize
;
536 length
-= pHeader
->dataHeader
.headerSize
;
538 const int32_t *indexes
= (const int32_t *)p
;
539 if (length
< indexes
[UCNVSEL_INDEX_SIZE
]) {
541 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
544 p
+= UCNVSEL_INDEX_COUNT
* 4;
545 // create and populate the selector object
546 UConverterSelector
* sel
= (UConverterSelector
*)uprv_malloc(sizeof(UConverterSelector
));
548 (char **)uprv_malloc(
549 indexes
[UCNVSEL_INDEX_NAMES_COUNT
] * sizeof(char *));
550 if (sel
== NULL
|| encodings
== NULL
) {
553 uprv_free(encodings
);
554 *status
= U_MEMORY_ALLOCATION_ERROR
;
557 uprv_memset(sel
, 0, sizeof(UConverterSelector
));
558 sel
->pvCount
= indexes
[UCNVSEL_INDEX_PV_COUNT
];
559 sel
->encodings
= encodings
;
560 sel
->encodingsCount
= indexes
[UCNVSEL_INDEX_NAMES_COUNT
];
561 sel
->encodingStrLength
= indexes
[UCNVSEL_INDEX_NAMES_LENGTH
];
562 sel
->swapped
= swapped
;
564 sel
->trie
= utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS
,
565 p
, indexes
[UCNVSEL_INDEX_TRIE_SIZE
], NULL
,
567 p
+= indexes
[UCNVSEL_INDEX_TRIE_SIZE
];
568 if (U_FAILURE(*status
)) {
573 sel
->pv
= (uint32_t *)p
;
574 p
+= sel
->pvCount
* 4;
577 for (int32_t i
= 0; i
< sel
->encodingsCount
; ++i
) {
578 sel
->encodings
[i
] = s
;
579 s
+= uprv_strlen(s
) + 1;
581 p
+= sel
->encodingStrLength
;
586 // a bunch of functions for the enumeration thingie! Nothing fancy here. Just
587 // iterate over the selected encodings
592 const UConverterSelector
* sel
;
597 static void U_CALLCONV
598 ucnvsel_close_selector_iterator(UEnumeration
*enumerator
) {
599 uprv_free(((Enumerator
*)(enumerator
->context
))->index
);
600 uprv_free(enumerator
->context
);
601 uprv_free(enumerator
);
605 static int32_t U_CALLCONV
606 ucnvsel_count_encodings(UEnumeration
*enumerator
, UErrorCode
*status
) {
607 // check if already failed
608 if (U_FAILURE(*status
)) {
611 return ((Enumerator
*)(enumerator
->context
))->length
;
615 static const char* U_CALLCONV
ucnvsel_next_encoding(UEnumeration
* enumerator
,
616 int32_t* resultLength
,
617 UErrorCode
* status
) {
618 // check if already failed
619 if (U_FAILURE(*status
)) {
623 int16_t cur
= ((Enumerator
*)(enumerator
->context
))->cur
;
624 const UConverterSelector
* sel
;
626 if (cur
>= ((Enumerator
*)(enumerator
->context
))->length
) {
629 sel
= ((Enumerator
*)(enumerator
->context
))->sel
;
630 result
= sel
->encodings
[((Enumerator
*)(enumerator
->context
))->index
[cur
] ];
631 ((Enumerator
*)(enumerator
->context
))->cur
++;
633 *resultLength
= (int32_t)uprv_strlen(result
);
638 static void U_CALLCONV
ucnvsel_reset_iterator(UEnumeration
* enumerator
,
639 UErrorCode
* status
) {
640 // check if already failed
641 if (U_FAILURE(*status
)) {
644 ((Enumerator
*)(enumerator
->context
))->cur
= 0;
650 static const UEnumeration defaultEncodings
= {
653 ucnvsel_close_selector_iterator
,
654 ucnvsel_count_encodings
,
656 ucnvsel_next_encoding
,
657 ucnvsel_reset_iterator
661 // internal fn to intersect two sets of masks
662 // returns whether the mask has reduced to all zeros
663 static UBool
intersectMasks(uint32_t* dest
, const uint32_t* source1
, int32_t len
) {
665 uint32_t oredDest
= 0;
666 for (i
= 0 ; i
< len
; ++i
) {
667 oredDest
|= (dest
[i
] &= source1
[i
]);
669 return oredDest
== 0;
672 // internal fn to count how many 1's are there in a mask
673 // algorithm taken from http://graphics.stanford.edu/~seander/bithacks.html
674 static int16_t countOnes(uint32_t* mask
, int32_t len
) {
675 int32_t i
, totalOnes
= 0;
676 for (i
= 0 ; i
< len
; ++i
) {
677 uint32_t ent
= mask
[i
];
678 for (; ent
; totalOnes
++)
680 ent
&= ent
- 1; // clear the least significant bit set
687 /* internal function! */
688 static UEnumeration
*selectForMask(const UConverterSelector
* sel
,
689 uint32_t *mask
, UErrorCode
*status
) {
690 // this is the context we will use. Store a table of indices to which
691 // encodings are legit.
692 struct Enumerator
* result
= (Enumerator
*)uprv_malloc(sizeof(Enumerator
));
693 if (result
== NULL
) {
695 *status
= U_MEMORY_ALLOCATION_ERROR
;
698 result
->index
= NULL
; // this will be allocated later!
699 result
->length
= result
->cur
= 0;
702 UEnumeration
*en
= (UEnumeration
*)uprv_malloc(sizeof(UEnumeration
));
704 // TODO(markus): Combine Enumerator and UEnumeration into one struct.
707 *status
= U_MEMORY_ALLOCATION_ERROR
;
710 memcpy(en
, &defaultEncodings
, sizeof(UEnumeration
));
711 en
->context
= result
;
713 int32_t columns
= (sel
->encodingsCount
+31)/32;
714 int16_t numOnes
= countOnes(mask
, columns
);
715 // now, we know the exact space we need for index
717 result
->index
= (int16_t*) uprv_malloc(numOnes
* sizeof(int16_t));
721 for (j
= 0 ; j
< columns
; j
++) {
722 uint32_t v
= mask
[j
];
723 for (i
= 0 ; i
< 32 && k
< sel
->encodingsCount
; i
++, k
++) {
725 result
->index
[result
->length
++] = k
;
730 } //otherwise, index will remain NULL (and will never be touched by
731 //the enumerator code anyway)
736 /* check a string against the selector - UTF16 version */
737 U_CAPI UEnumeration
* U_EXPORT2
738 ucnvsel_selectForString(const UConverterSelector
* sel
,
739 const UChar
*s
, int32_t length
, UErrorCode
*status
) {
740 // check if already failed
741 if (U_FAILURE(*status
)) {
744 // ensure args make sense!
745 if (sel
== NULL
|| (s
== NULL
&& length
!= 0)) {
746 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
750 int32_t columns
= (sel
->encodingsCount
+31)/32;
751 uint32_t* mask
= (uint32_t*) uprv_malloc(columns
* 4);
753 *status
= U_MEMORY_ALLOCATION_ERROR
;
756 uprv_memset(mask
, ~0, columns
*4);
765 while (limit
== NULL
? *s
!= 0 : s
!= limit
) {
768 UTRIE2_U16_NEXT16(sel
->trie
, s
, limit
, c
, pvIndex
);
769 if (intersectMasks(mask
, sel
->pv
+pvIndex
, columns
)) {
773 return selectForMask(sel
, mask
, status
);
776 /* check a string against the selector - UTF8 version */
777 U_CAPI UEnumeration
* U_EXPORT2
778 ucnvsel_selectForUTF8(const UConverterSelector
* sel
,
779 const char *s
, int32_t length
, UErrorCode
*status
) {
780 // check if already failed
781 if (U_FAILURE(*status
)) {
784 // ensure args make sense!
785 if (sel
== NULL
|| (s
== NULL
&& length
!= 0)) {
786 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
790 int32_t columns
= (sel
->encodingsCount
+31)/32;
791 uint32_t* mask
= (uint32_t*) uprv_malloc(columns
* 4);
793 *status
= U_MEMORY_ALLOCATION_ERROR
;
796 uprv_memset(mask
, ~0, columns
*4);
799 length
= (int32_t)uprv_strlen(s
);
801 const char *limit
= s
+ length
;
805 UTRIE2_U8_NEXT16(sel
->trie
, s
, limit
, pvIndex
);
806 if (intersectMasks(mask
, sel
->pv
+pvIndex
, columns
)) {
810 return selectForMask(sel
, mask
, status
);