2 *******************************************************************************
4 * Copyright (C) 2008-2011, International Business Machines
5 * Corporation, Google and others. All Rights Reserved.
7 *******************************************************************************
9 // Author : eldawy@google.com (Mohamed Eldawy)
12 // Purpose: To generate a list of encodings capable of handling
13 // a given Unicode text
15 // Started 09-April-2008
20 * This is an implementation of an encoding selector.
21 * The goal is, given a unicode string, find the encodings
22 * this string can be mapped to. To make processing faster
23 * a trie is built when you call ucnvsel_open() that
24 * stores all encodings a codepoint can map to
27 #include "unicode/ucnvsel.h"
29 #if !UCONFIG_NO_CONVERSION
33 #include "unicode/uchar.h"
34 #include "unicode/uniset.h"
35 #include "unicode/ucnv.h"
36 #include "unicode/ustring.h"
37 #include "unicode/uchriter.h"
48 struct UConverterSelector
{
49 UTrie2
*trie
; // 16 bit trie containing offsets into pv
50 uint32_t* pv
; // table of bits!
52 char** encodings
; // which encodings did user ask to use?
53 int32_t encodingsCount
;
54 int32_t encodingStrLength
;
56 UBool ownPv
, ownEncodingStrings
;
59 static void generateSelectorData(UConverterSelector
* result
,
61 const USet
* excludedCodePoints
,
62 const UConverterUnicodeSet whichSet
,
64 if (U_FAILURE(*status
)) {
68 int32_t columns
= (result
->encodingsCount
+31)/32;
70 // set errorValue to all-ones
71 for (int32_t col
= 0; col
< columns
; col
++) {
72 upvec_setValue(upvec
, UPVEC_ERROR_VALUE_CP
, UPVEC_ERROR_VALUE_CP
,
76 for (int32_t i
= 0; i
< result
->encodingsCount
; ++i
) {
81 UConverter
* test_converter
= ucnv_open(result
->encodings
[i
], status
);
82 if (U_FAILURE(*status
)) {
85 USet
* unicode_point_set
;
86 unicode_point_set
= uset_open(1, 0); // empty set
88 ucnv_getUnicodeSet(test_converter
, unicode_point_set
,
90 if (U_FAILURE(*status
)) {
91 ucnv_close(test_converter
);
97 // now iterate over intervals on set i!
98 item_count
= uset_getItemCount(unicode_point_set
);
100 for (j
= 0; j
< item_count
; ++j
) {
103 UErrorCode smallStatus
= U_ZERO_ERROR
;
104 uset_getItem(unicode_point_set
, j
, &start_char
, &end_char
, NULL
, 0,
106 if (U_FAILURE(smallStatus
)) {
107 // this will be reached for the converters that fill the set with
108 // strings. Those should be ignored by our system
110 upvec_setValue(upvec
, start_char
, end_char
, column
, ~0, mask
,
114 ucnv_close(test_converter
);
115 uset_close(unicode_point_set
);
116 if (U_FAILURE(*status
)) {
121 // handle excluded encodings! Simply set their values to all 1's in the upvec
122 if (excludedCodePoints
) {
123 int32_t item_count
= uset_getItemCount(excludedCodePoints
);
124 for (int32_t j
= 0; j
< item_count
; ++j
) {
128 uset_getItem(excludedCodePoints
, j
, &start_char
, &end_char
, NULL
, 0,
130 for (int32_t col
= 0; col
< columns
; col
++) {
131 upvec_setValue(upvec
, start_char
, end_char
, col
, ~0, ~0,
137 // alright. Now, let's put things in the same exact form you'd get when you
138 // unserialize things.
139 result
->trie
= upvec_compactToUTrie2WithRowIndexes(upvec
, status
);
140 result
->pv
= upvec_cloneArray(upvec
, &result
->pvCount
, NULL
, status
);
141 result
->pvCount
*= columns
; // number of uint32_t = rows * columns
142 result
->ownPv
= TRUE
;
145 /* open a selector. If converterListSize is 0, build for all converters.
146 If excludedCodePoints is NULL, don't exclude any codepoints */
147 U_CAPI UConverterSelector
* U_EXPORT2
148 ucnvsel_open(const char* const* converterList
, int32_t converterListSize
,
149 const USet
* excludedCodePoints
,
150 const UConverterUnicodeSet whichSet
, UErrorCode
* status
) {
151 // check if already failed
152 if (U_FAILURE(*status
)) {
155 // ensure args make sense!
156 if (converterListSize
< 0 || (converterList
== NULL
&& converterListSize
!= 0)) {
157 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
161 // allocate a new converter
162 LocalUConverterSelectorPointer
newSelector(
163 (UConverterSelector
*)uprv_malloc(sizeof(UConverterSelector
)));
164 if (newSelector
.isNull()) {
165 *status
= U_MEMORY_ALLOCATION_ERROR
;
168 uprv_memset(newSelector
.getAlias(), 0, sizeof(UConverterSelector
));
170 if (converterListSize
== 0) {
171 converterList
= NULL
;
172 converterListSize
= ucnv_countAvailable();
174 newSelector
->encodings
=
175 (char**)uprv_malloc(converterListSize
* sizeof(char*));
176 if (!newSelector
->encodings
) {
177 *status
= U_MEMORY_ALLOCATION_ERROR
;
180 newSelector
->encodings
[0] = NULL
; // now we can call ucnvsel_close()
182 // make a backup copy of the list of converters
183 int32_t totalSize
= 0;
185 for (i
= 0; i
< converterListSize
; i
++) {
187 (int32_t)uprv_strlen(converterList
!= NULL
? converterList
[i
] : ucnv_getAvailableName(i
)) + 1;
189 // 4-align the totalSize to 4-align the size of the serialized form
190 int32_t encodingStrPadding
= totalSize
& 3;
191 if (encodingStrPadding
!= 0) {
192 encodingStrPadding
= 4 - encodingStrPadding
;
194 newSelector
->encodingStrLength
= totalSize
+= encodingStrPadding
;
195 char* allStrings
= (char*) uprv_malloc(totalSize
);
197 *status
= U_MEMORY_ALLOCATION_ERROR
;
201 for (i
= 0; i
< converterListSize
; i
++) {
202 newSelector
->encodings
[i
] = allStrings
;
203 uprv_strcpy(newSelector
->encodings
[i
],
204 converterList
!= NULL
? converterList
[i
] : ucnv_getAvailableName(i
));
205 allStrings
+= uprv_strlen(newSelector
->encodings
[i
]) + 1;
207 while (encodingStrPadding
> 0) {
209 --encodingStrPadding
;
212 newSelector
->ownEncodingStrings
= TRUE
;
213 newSelector
->encodingsCount
= converterListSize
;
214 UPropsVectors
*upvec
= upvec_open((converterListSize
+31)/32, status
);
215 generateSelectorData(newSelector
.getAlias(), upvec
, excludedCodePoints
, whichSet
, status
);
218 if (U_FAILURE(*status
)) {
222 return newSelector
.orphan();
225 /* close opened selector */
226 U_CAPI
void U_EXPORT2
227 ucnvsel_close(UConverterSelector
*sel
) {
231 if (sel
->ownEncodingStrings
) {
232 uprv_free(sel
->encodings
[0]);
234 uprv_free(sel
->encodings
);
238 utrie2_close(sel
->trie
);
239 uprv_free(sel
->swapped
);
243 static const UDataInfo dataInfo
= {
252 { 0x43, 0x53, 0x65, 0x6c }, /* dataFormat="CSel" */
253 { 1, 0, 0, 0 }, /* formatVersion */
254 { 0, 0, 0, 0 } /* dataVersion */
258 UCNVSEL_INDEX_TRIE_SIZE
, // trie size in bytes
259 UCNVSEL_INDEX_PV_COUNT
, // number of uint32_t in the bit vectors
260 UCNVSEL_INDEX_NAMES_COUNT
, // number of encoding names
261 UCNVSEL_INDEX_NAMES_LENGTH
, // number of encoding name bytes including padding
262 UCNVSEL_INDEX_SIZE
= 15, // bytes following the DataHeader
263 UCNVSEL_INDEX_COUNT
= 16
267 * Serialized form of a UConverterSelector, formatVersion 1:
269 * The serialized form begins with a standard ICU DataHeader with a UDataInfo
270 * as the template above.
271 * This is followed by:
272 * int32_t indexes[UCNVSEL_INDEX_COUNT]; // see index entry constants above
273 * serialized UTrie2; // indexes[UCNVSEL_INDEX_TRIE_SIZE] bytes
274 * uint32_t pv[indexes[UCNVSEL_INDEX_PV_COUNT]]; // bit vectors
275 * char* encodingNames[indexes[UCNVSEL_INDEX_NAMES_LENGTH]]; // NUL-terminated strings + padding
278 /* serialize a selector */
279 U_CAPI
int32_t U_EXPORT2
280 ucnvsel_serialize(const UConverterSelector
* sel
,
281 void* buffer
, int32_t bufferCapacity
, UErrorCode
* status
) {
282 // check if already failed
283 if (U_FAILURE(*status
)) {
286 // ensure args make sense!
287 uint8_t *p
= (uint8_t *)buffer
;
288 if (bufferCapacity
< 0 ||
289 (bufferCapacity
> 0 && (p
== NULL
|| (U_POINTER_MASK_LSB(p
, 3) != 0)))
291 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
294 // add up the size of the serialized form
295 int32_t serializedTrieSize
= utrie2_serialize(sel
->trie
, NULL
, 0, status
);
296 if (*status
!= U_BUFFER_OVERFLOW_ERROR
&& U_FAILURE(*status
)) {
299 *status
= U_ZERO_ERROR
;
302 uprv_memset(&header
, 0, sizeof(header
));
303 header
.dataHeader
.headerSize
= (uint16_t)((sizeof(header
) + 15) & ~15);
304 header
.dataHeader
.magic1
= 0xda;
305 header
.dataHeader
.magic2
= 0x27;
306 uprv_memcpy(&header
.info
, &dataInfo
, sizeof(dataInfo
));
308 int32_t indexes
[UCNVSEL_INDEX_COUNT
] = {
312 sel
->encodingStrLength
316 header
.dataHeader
.headerSize
+
317 (int32_t)sizeof(indexes
) +
320 sel
->encodingStrLength
;
321 indexes
[UCNVSEL_INDEX_SIZE
] = totalSize
- header
.dataHeader
.headerSize
;
322 if (totalSize
> bufferCapacity
) {
323 *status
= U_BUFFER_OVERFLOW_ERROR
;
327 int32_t length
= header
.dataHeader
.headerSize
;
328 uprv_memcpy(p
, &header
, sizeof(header
));
329 uprv_memset(p
+ sizeof(header
), 0, length
- sizeof(header
));
332 length
= (int32_t)sizeof(indexes
);
333 uprv_memcpy(p
, indexes
, length
);
336 utrie2_serialize(sel
->trie
, p
, serializedTrieSize
, status
);
337 p
+= serializedTrieSize
;
339 length
= sel
->pvCount
* 4;
340 uprv_memcpy(p
, sel
->pv
, length
);
343 uprv_memcpy(p
, sel
->encodings
[0], sel
->encodingStrLength
);
344 p
+= sel
->encodingStrLength
;
350 * swap a selector into the desired Endianness and Asciiness of
351 * the system. Just as FYI, selectors are always saved in the format
352 * of the system that created them. They are only converted if used
353 * on another system. In other words, selectors created on different
354 * system can be different even if the params are identical (endianness
355 * and Asciiness differences only)
357 * @param ds pointer to data swapper containing swapping info
358 * @param inData pointer to incoming data
359 * @param length length of inData in bytes
360 * @param outData pointer to output data. Capacity should
361 * be at least equal to capacity of inData
362 * @param status an in/out ICU UErrorCode
363 * @return 0 on failure, number of bytes swapped on success
364 * number of bytes swapped can be smaller than length
367 ucnvsel_swap(const UDataSwapper
*ds
,
368 const void *inData
, int32_t length
,
369 void *outData
, UErrorCode
*status
) {
370 /* udata_swapDataHeader checks the arguments */
371 int32_t headerSize
= udata_swapDataHeader(ds
, inData
, length
, outData
, status
);
372 if(U_FAILURE(*status
)) {
376 /* check data format and format version */
377 const UDataInfo
*pInfo
= (const UDataInfo
*)((const char *)inData
+ 4);
379 pInfo
->dataFormat
[0] == 0x43 && /* dataFormat="CSel" */
380 pInfo
->dataFormat
[1] == 0x53 &&
381 pInfo
->dataFormat
[2] == 0x65 &&
382 pInfo
->dataFormat
[3] == 0x6c
384 udata_printError(ds
, "ucnvsel_swap(): data format %02x.%02x.%02x.%02x is not recognized as UConverterSelector data\n",
385 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
386 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3]);
387 *status
= U_INVALID_FORMAT_ERROR
;
390 if(pInfo
->formatVersion
[0] != 1) {
391 udata_printError(ds
, "ucnvsel_swap(): format version %02x is not supported\n",
392 pInfo
->formatVersion
[0]);
393 *status
= U_UNSUPPORTED_ERROR
;
398 length
-= headerSize
;
400 udata_printError(ds
, "ucnvsel_swap(): too few bytes (%d after header) for UConverterSelector data\n",
402 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
407 const uint8_t *inBytes
= (const uint8_t *)inData
+ headerSize
;
408 uint8_t *outBytes
= (uint8_t *)outData
+ headerSize
;
410 /* read the indexes */
411 const int32_t *inIndexes
= (const int32_t *)inBytes
;
414 for(i
= 0; i
< 16; ++i
) {
415 indexes
[i
] = udata_readInt32(ds
, inIndexes
[i
]);
418 /* get the total length of the data */
419 int32_t size
= indexes
[UCNVSEL_INDEX_SIZE
];
422 udata_printError(ds
, "ucnvsel_swap(): too few bytes (%d after header) for all of UConverterSelector data\n",
424 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
428 /* copy the data for inaccessible bytes */
429 if(inBytes
!= outBytes
) {
430 uprv_memcpy(outBytes
, inBytes
, size
);
433 int32_t offset
= 0, count
;
435 /* swap the int32_t indexes[] */
436 count
= UCNVSEL_INDEX_COUNT
*4;
437 ds
->swapArray32(ds
, inBytes
, count
, outBytes
, status
);
440 /* swap the UTrie2 */
441 count
= indexes
[UCNVSEL_INDEX_TRIE_SIZE
];
442 utrie2_swap(ds
, inBytes
+ offset
, count
, outBytes
+ offset
, status
);
445 /* swap the uint32_t pv[] */
446 count
= indexes
[UCNVSEL_INDEX_PV_COUNT
]*4;
447 ds
->swapArray32(ds
, inBytes
+ offset
, count
, outBytes
+ offset
, status
);
450 /* swap the encoding names */
451 count
= indexes
[UCNVSEL_INDEX_NAMES_LENGTH
];
452 ds
->swapInvChars(ds
, inBytes
+ offset
, count
, outBytes
+ offset
, status
);
455 U_ASSERT(offset
== size
);
458 return headerSize
+ size
;
461 /* unserialize a selector */
462 U_CAPI UConverterSelector
* U_EXPORT2
463 ucnvsel_openFromSerialized(const void* buffer
, int32_t length
, UErrorCode
* status
) {
464 // check if already failed
465 if (U_FAILURE(*status
)) {
468 // ensure args make sense!
469 const uint8_t *p
= (const uint8_t *)buffer
;
471 (length
> 0 && (p
== NULL
|| (U_POINTER_MASK_LSB(p
, 3) != 0)))
473 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
478 // not even enough space for a minimal header
479 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
482 const DataHeader
*pHeader
= (const DataHeader
*)p
;
484 pHeader
->dataHeader
.magic1
==0xda &&
485 pHeader
->dataHeader
.magic2
==0x27 &&
486 pHeader
->info
.dataFormat
[0] == 0x43 &&
487 pHeader
->info
.dataFormat
[1] == 0x53 &&
488 pHeader
->info
.dataFormat
[2] == 0x65 &&
489 pHeader
->info
.dataFormat
[3] == 0x6c
491 /* header not valid or dataFormat not recognized */
492 *status
= U_INVALID_FORMAT_ERROR
;
495 if (pHeader
->info
.formatVersion
[0] != 1) {
496 *status
= U_UNSUPPORTED_ERROR
;
499 uint8_t* swapped
= NULL
;
500 if (pHeader
->info
.isBigEndian
!= U_IS_BIG_ENDIAN
||
501 pHeader
->info
.charsetFamily
!= U_CHARSET_FAMILY
505 udata_openSwapperForInputData(p
, length
, U_IS_BIG_ENDIAN
, U_CHARSET_FAMILY
, status
);
506 int32_t totalSize
= ucnvsel_swap(ds
, p
, -1, NULL
, status
);
507 if (U_FAILURE(*status
)) {
508 udata_closeSwapper(ds
);
511 if (length
< totalSize
) {
512 udata_closeSwapper(ds
);
513 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
516 swapped
= (uint8_t*)uprv_malloc(totalSize
);
517 if (swapped
== NULL
) {
518 udata_closeSwapper(ds
);
519 *status
= U_MEMORY_ALLOCATION_ERROR
;
522 ucnvsel_swap(ds
, p
, length
, swapped
, status
);
523 udata_closeSwapper(ds
);
524 if (U_FAILURE(*status
)) {
529 pHeader
= (const DataHeader
*)p
;
531 if (length
< (pHeader
->dataHeader
.headerSize
+ 16 * 4)) {
532 // not even enough space for the header and the indexes
534 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
537 p
+= pHeader
->dataHeader
.headerSize
;
538 length
-= pHeader
->dataHeader
.headerSize
;
540 const int32_t *indexes
= (const int32_t *)p
;
541 if (length
< indexes
[UCNVSEL_INDEX_SIZE
]) {
543 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
546 p
+= UCNVSEL_INDEX_COUNT
* 4;
547 // create and populate the selector object
548 UConverterSelector
* sel
= (UConverterSelector
*)uprv_malloc(sizeof(UConverterSelector
));
550 (char **)uprv_malloc(
551 indexes
[UCNVSEL_INDEX_NAMES_COUNT
] * sizeof(char *));
552 if (sel
== NULL
|| encodings
== NULL
) {
555 uprv_free(encodings
);
556 *status
= U_MEMORY_ALLOCATION_ERROR
;
559 uprv_memset(sel
, 0, sizeof(UConverterSelector
));
560 sel
->pvCount
= indexes
[UCNVSEL_INDEX_PV_COUNT
];
561 sel
->encodings
= encodings
;
562 sel
->encodingsCount
= indexes
[UCNVSEL_INDEX_NAMES_COUNT
];
563 sel
->encodingStrLength
= indexes
[UCNVSEL_INDEX_NAMES_LENGTH
];
564 sel
->swapped
= swapped
;
566 sel
->trie
= utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS
,
567 p
, indexes
[UCNVSEL_INDEX_TRIE_SIZE
], NULL
,
569 p
+= indexes
[UCNVSEL_INDEX_TRIE_SIZE
];
570 if (U_FAILURE(*status
)) {
575 sel
->pv
= (uint32_t *)p
;
576 p
+= sel
->pvCount
* 4;
579 for (int32_t i
= 0; i
< sel
->encodingsCount
; ++i
) {
580 sel
->encodings
[i
] = s
;
581 s
+= uprv_strlen(s
) + 1;
583 p
+= sel
->encodingStrLength
;
588 // a bunch of functions for the enumeration thingie! Nothing fancy here. Just
589 // iterate over the selected encodings
594 const UConverterSelector
* sel
;
599 static void U_CALLCONV
600 ucnvsel_close_selector_iterator(UEnumeration
*enumerator
) {
601 uprv_free(((Enumerator
*)(enumerator
->context
))->index
);
602 uprv_free(enumerator
->context
);
603 uprv_free(enumerator
);
607 static int32_t U_CALLCONV
608 ucnvsel_count_encodings(UEnumeration
*enumerator
, UErrorCode
*status
) {
609 // check if already failed
610 if (U_FAILURE(*status
)) {
613 return ((Enumerator
*)(enumerator
->context
))->length
;
617 static const char* U_CALLCONV
ucnvsel_next_encoding(UEnumeration
* enumerator
,
618 int32_t* resultLength
,
619 UErrorCode
* status
) {
620 // check if already failed
621 if (U_FAILURE(*status
)) {
625 int16_t cur
= ((Enumerator
*)(enumerator
->context
))->cur
;
626 const UConverterSelector
* sel
;
628 if (cur
>= ((Enumerator
*)(enumerator
->context
))->length
) {
631 sel
= ((Enumerator
*)(enumerator
->context
))->sel
;
632 result
= sel
->encodings
[((Enumerator
*)(enumerator
->context
))->index
[cur
] ];
633 ((Enumerator
*)(enumerator
->context
))->cur
++;
635 *resultLength
= (int32_t)uprv_strlen(result
);
640 static void U_CALLCONV
ucnvsel_reset_iterator(UEnumeration
* enumerator
,
641 UErrorCode
* status
) {
642 // check if already failed
643 if (U_FAILURE(*status
)) {
646 ((Enumerator
*)(enumerator
->context
))->cur
= 0;
652 static const UEnumeration defaultEncodings
= {
655 ucnvsel_close_selector_iterator
,
656 ucnvsel_count_encodings
,
658 ucnvsel_next_encoding
,
659 ucnvsel_reset_iterator
663 // internal fn to intersect two sets of masks
664 // returns whether the mask has reduced to all zeros
665 static UBool
intersectMasks(uint32_t* dest
, const uint32_t* source1
, int32_t len
) {
667 uint32_t oredDest
= 0;
668 for (i
= 0 ; i
< len
; ++i
) {
669 oredDest
|= (dest
[i
] &= source1
[i
]);
671 return oredDest
== 0;
674 // internal fn to count how many 1's are there in a mask
675 // algorithm taken from http://graphics.stanford.edu/~seander/bithacks.html
676 static int16_t countOnes(uint32_t* mask
, int32_t len
) {
677 int32_t i
, totalOnes
= 0;
678 for (i
= 0 ; i
< len
; ++i
) {
679 uint32_t ent
= mask
[i
];
680 for (; ent
; totalOnes
++)
682 ent
&= ent
- 1; // clear the least significant bit set
689 /* internal function! */
690 static UEnumeration
*selectForMask(const UConverterSelector
* sel
,
691 uint32_t *mask
, UErrorCode
*status
) {
692 // this is the context we will use. Store a table of indices to which
693 // encodings are legit.
694 struct Enumerator
* result
= (Enumerator
*)uprv_malloc(sizeof(Enumerator
));
695 if (result
== NULL
) {
697 *status
= U_MEMORY_ALLOCATION_ERROR
;
700 result
->index
= NULL
; // this will be allocated later!
701 result
->length
= result
->cur
= 0;
704 UEnumeration
*en
= (UEnumeration
*)uprv_malloc(sizeof(UEnumeration
));
706 // TODO(markus): Combine Enumerator and UEnumeration into one struct.
709 *status
= U_MEMORY_ALLOCATION_ERROR
;
712 memcpy(en
, &defaultEncodings
, sizeof(UEnumeration
));
713 en
->context
= result
;
715 int32_t columns
= (sel
->encodingsCount
+31)/32;
716 int16_t numOnes
= countOnes(mask
, columns
);
717 // now, we know the exact space we need for index
719 result
->index
= (int16_t*) uprv_malloc(numOnes
* sizeof(int16_t));
723 for (j
= 0 ; j
< columns
; j
++) {
724 uint32_t v
= mask
[j
];
725 for (i
= 0 ; i
< 32 && k
< sel
->encodingsCount
; i
++, k
++) {
727 result
->index
[result
->length
++] = k
;
732 } //otherwise, index will remain NULL (and will never be touched by
733 //the enumerator code anyway)
738 /* check a string against the selector - UTF16 version */
739 U_CAPI UEnumeration
* U_EXPORT2
740 ucnvsel_selectForString(const UConverterSelector
* sel
,
741 const UChar
*s
, int32_t length
, UErrorCode
*status
) {
742 // check if already failed
743 if (U_FAILURE(*status
)) {
746 // ensure args make sense!
747 if (sel
== NULL
|| (s
== NULL
&& length
!= 0)) {
748 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
752 int32_t columns
= (sel
->encodingsCount
+31)/32;
753 uint32_t* mask
= (uint32_t*) uprv_malloc(columns
* 4);
755 *status
= U_MEMORY_ALLOCATION_ERROR
;
758 uprv_memset(mask
, ~0, columns
*4);
768 while (limit
== NULL
? *s
!= 0 : s
!= limit
) {
771 UTRIE2_U16_NEXT16(sel
->trie
, s
, limit
, c
, pvIndex
);
772 if (intersectMasks(mask
, sel
->pv
+pvIndex
, columns
)) {
777 return selectForMask(sel
, mask
, status
);
780 /* check a string against the selector - UTF8 version */
781 U_CAPI UEnumeration
* U_EXPORT2
782 ucnvsel_selectForUTF8(const UConverterSelector
* sel
,
783 const char *s
, int32_t length
, UErrorCode
*status
) {
784 // check if already failed
785 if (U_FAILURE(*status
)) {
788 // ensure args make sense!
789 if (sel
== NULL
|| (s
== NULL
&& length
!= 0)) {
790 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
794 int32_t columns
= (sel
->encodingsCount
+31)/32;
795 uint32_t* mask
= (uint32_t*) uprv_malloc(columns
* 4);
797 *status
= U_MEMORY_ALLOCATION_ERROR
;
800 uprv_memset(mask
, ~0, columns
*4);
803 length
= (int32_t)uprv_strlen(s
);
807 const char *limit
= s
+ length
;
811 UTRIE2_U8_NEXT16(sel
->trie
, s
, limit
, pvIndex
);
812 if (intersectMasks(mask
, sel
->pv
+pvIndex
, columns
)) {
817 return selectForMask(sel
, mask
, status
);
820 #endif // !UCONFIG_NO_CONVERSION