1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2008-2011, International Business Machines
7 * Corporation, Google and others. All Rights Reserved.
9 *******************************************************************************
11 // Author : eldawy@google.com (Mohamed Eldawy)
14 // Purpose: To generate a list of encodings capable of handling
15 // a given Unicode text
17 // Started 09-April-2008
22 * This is an implementation of an encoding selector.
23 * The goal is, given a unicode string, find the encodings
24 * this string can be mapped to. To make processing faster
25 * a trie is built when you call ucnvsel_open() that
26 * stores all encodings a codepoint can map to
29 #include "unicode/ucnvsel.h"
31 #if !UCONFIG_NO_CONVERSION
35 #include "unicode/uchar.h"
36 #include "unicode/uniset.h"
37 #include "unicode/ucnv.h"
38 #include "unicode/ustring.h"
39 #include "unicode/uchriter.h"
50 struct UConverterSelector
{
51 UTrie2
*trie
; // 16 bit trie containing offsets into pv
52 uint32_t* pv
; // table of bits!
54 char** encodings
; // which encodings did user ask to use?
55 int32_t encodingsCount
;
56 int32_t encodingStrLength
;
58 UBool ownPv
, ownEncodingStrings
;
61 static void generateSelectorData(UConverterSelector
* result
,
63 const USet
* excludedCodePoints
,
64 const UConverterUnicodeSet whichSet
,
66 if (U_FAILURE(*status
)) {
70 int32_t columns
= (result
->encodingsCount
+31)/32;
72 // set errorValue to all-ones
73 for (int32_t col
= 0; col
< columns
; col
++) {
74 upvec_setValue(upvec
, UPVEC_ERROR_VALUE_CP
, UPVEC_ERROR_VALUE_CP
,
78 for (int32_t i
= 0; i
< result
->encodingsCount
; ++i
) {
83 UConverter
* test_converter
= ucnv_open(result
->encodings
[i
], status
);
84 if (U_FAILURE(*status
)) {
87 USet
* unicode_point_set
;
88 unicode_point_set
= uset_open(1, 0); // empty set
90 ucnv_getUnicodeSet(test_converter
, unicode_point_set
,
92 if (U_FAILURE(*status
)) {
93 ucnv_close(test_converter
);
99 // now iterate over intervals on set i!
100 item_count
= uset_getItemCount(unicode_point_set
);
102 for (j
= 0; j
< item_count
; ++j
) {
105 UErrorCode smallStatus
= U_ZERO_ERROR
;
106 uset_getItem(unicode_point_set
, j
, &start_char
, &end_char
, NULL
, 0,
108 if (U_FAILURE(smallStatus
)) {
109 // this will be reached for the converters that fill the set with
110 // strings. Those should be ignored by our system
112 upvec_setValue(upvec
, start_char
, end_char
, column
, ~0, mask
,
116 ucnv_close(test_converter
);
117 uset_close(unicode_point_set
);
118 if (U_FAILURE(*status
)) {
123 // handle excluded encodings! Simply set their values to all 1's in the upvec
124 if (excludedCodePoints
) {
125 int32_t item_count
= uset_getItemCount(excludedCodePoints
);
126 for (int32_t j
= 0; j
< item_count
; ++j
) {
130 uset_getItem(excludedCodePoints
, j
, &start_char
, &end_char
, NULL
, 0,
132 for (int32_t col
= 0; col
< columns
; col
++) {
133 upvec_setValue(upvec
, start_char
, end_char
, col
, ~0, ~0,
139 // alright. Now, let's put things in the same exact form you'd get when you
140 // unserialize things.
141 result
->trie
= upvec_compactToUTrie2WithRowIndexes(upvec
, status
);
142 result
->pv
= upvec_cloneArray(upvec
, &result
->pvCount
, NULL
, status
);
143 result
->pvCount
*= columns
; // number of uint32_t = rows * columns
144 result
->ownPv
= TRUE
;
147 /* open a selector. If converterListSize is 0, build for all converters.
148 If excludedCodePoints is NULL, don't exclude any codepoints */
149 U_CAPI UConverterSelector
* U_EXPORT2
150 ucnvsel_open(const char* const* converterList
, int32_t converterListSize
,
151 const USet
* excludedCodePoints
,
152 const UConverterUnicodeSet whichSet
, UErrorCode
* status
) {
153 // check if already failed
154 if (U_FAILURE(*status
)) {
157 // ensure args make sense!
158 if (converterListSize
< 0 || (converterList
== NULL
&& converterListSize
!= 0)) {
159 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
163 // allocate a new converter
164 LocalUConverterSelectorPointer
newSelector(
165 (UConverterSelector
*)uprv_malloc(sizeof(UConverterSelector
)));
166 if (newSelector
.isNull()) {
167 *status
= U_MEMORY_ALLOCATION_ERROR
;
170 uprv_memset(newSelector
.getAlias(), 0, sizeof(UConverterSelector
));
172 if (converterListSize
== 0) {
173 converterList
= NULL
;
174 converterListSize
= ucnv_countAvailable();
176 newSelector
->encodings
=
177 (char**)uprv_malloc(converterListSize
* sizeof(char*));
178 if (!newSelector
->encodings
) {
179 *status
= U_MEMORY_ALLOCATION_ERROR
;
182 newSelector
->encodings
[0] = NULL
; // now we can call ucnvsel_close()
184 // make a backup copy of the list of converters
185 int32_t totalSize
= 0;
187 for (i
= 0; i
< converterListSize
; i
++) {
189 (int32_t)uprv_strlen(converterList
!= NULL
? converterList
[i
] : ucnv_getAvailableName(i
)) + 1;
191 // 4-align the totalSize to 4-align the size of the serialized form
192 int32_t encodingStrPadding
= totalSize
& 3;
193 if (encodingStrPadding
!= 0) {
194 encodingStrPadding
= 4 - encodingStrPadding
;
196 newSelector
->encodingStrLength
= totalSize
+= encodingStrPadding
;
197 char* allStrings
= (char*) uprv_malloc(totalSize
);
199 *status
= U_MEMORY_ALLOCATION_ERROR
;
203 for (i
= 0; i
< converterListSize
; i
++) {
204 newSelector
->encodings
[i
] = allStrings
;
205 uprv_strcpy(newSelector
->encodings
[i
],
206 converterList
!= NULL
? converterList
[i
] : ucnv_getAvailableName(i
));
207 allStrings
+= uprv_strlen(newSelector
->encodings
[i
]) + 1;
209 while (encodingStrPadding
> 0) {
211 --encodingStrPadding
;
214 newSelector
->ownEncodingStrings
= TRUE
;
215 newSelector
->encodingsCount
= converterListSize
;
216 UPropsVectors
*upvec
= upvec_open((converterListSize
+31)/32, status
);
217 generateSelectorData(newSelector
.getAlias(), upvec
, excludedCodePoints
, whichSet
, status
);
220 if (U_FAILURE(*status
)) {
224 return newSelector
.orphan();
227 /* close opened selector */
228 U_CAPI
void U_EXPORT2
229 ucnvsel_close(UConverterSelector
*sel
) {
233 if (sel
->ownEncodingStrings
) {
234 uprv_free(sel
->encodings
[0]);
236 uprv_free(sel
->encodings
);
240 utrie2_close(sel
->trie
);
241 uprv_free(sel
->swapped
);
245 static const UDataInfo dataInfo
= {
254 { 0x43, 0x53, 0x65, 0x6c }, /* dataFormat="CSel" */
255 { 1, 0, 0, 0 }, /* formatVersion */
256 { 0, 0, 0, 0 } /* dataVersion */
260 UCNVSEL_INDEX_TRIE_SIZE
, // trie size in bytes
261 UCNVSEL_INDEX_PV_COUNT
, // number of uint32_t in the bit vectors
262 UCNVSEL_INDEX_NAMES_COUNT
, // number of encoding names
263 UCNVSEL_INDEX_NAMES_LENGTH
, // number of encoding name bytes including padding
264 UCNVSEL_INDEX_SIZE
= 15, // bytes following the DataHeader
265 UCNVSEL_INDEX_COUNT
= 16
269 * Serialized form of a UConverterSelector, formatVersion 1:
271 * The serialized form begins with a standard ICU DataHeader with a UDataInfo
272 * as the template above.
273 * This is followed by:
274 * int32_t indexes[UCNVSEL_INDEX_COUNT]; // see index entry constants above
275 * serialized UTrie2; // indexes[UCNVSEL_INDEX_TRIE_SIZE] bytes
276 * uint32_t pv[indexes[UCNVSEL_INDEX_PV_COUNT]]; // bit vectors
277 * char* encodingNames[indexes[UCNVSEL_INDEX_NAMES_LENGTH]]; // NUL-terminated strings + padding
280 /* serialize a selector */
281 U_CAPI
int32_t U_EXPORT2
282 ucnvsel_serialize(const UConverterSelector
* sel
,
283 void* buffer
, int32_t bufferCapacity
, UErrorCode
* status
) {
284 // check if already failed
285 if (U_FAILURE(*status
)) {
288 // ensure args make sense!
289 uint8_t *p
= (uint8_t *)buffer
;
290 if (bufferCapacity
< 0 ||
291 (bufferCapacity
> 0 && (p
== NULL
|| (U_POINTER_MASK_LSB(p
, 3) != 0)))
293 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
296 // add up the size of the serialized form
297 int32_t serializedTrieSize
= utrie2_serialize(sel
->trie
, NULL
, 0, status
);
298 if (*status
!= U_BUFFER_OVERFLOW_ERROR
&& U_FAILURE(*status
)) {
301 *status
= U_ZERO_ERROR
;
304 uprv_memset(&header
, 0, sizeof(header
));
305 header
.dataHeader
.headerSize
= (uint16_t)((sizeof(header
) + 15) & ~15);
306 header
.dataHeader
.magic1
= 0xda;
307 header
.dataHeader
.magic2
= 0x27;
308 uprv_memcpy(&header
.info
, &dataInfo
, sizeof(dataInfo
));
310 int32_t indexes
[UCNVSEL_INDEX_COUNT
] = {
314 sel
->encodingStrLength
318 header
.dataHeader
.headerSize
+
319 (int32_t)sizeof(indexes
) +
322 sel
->encodingStrLength
;
323 indexes
[UCNVSEL_INDEX_SIZE
] = totalSize
- header
.dataHeader
.headerSize
;
324 if (totalSize
> bufferCapacity
) {
325 *status
= U_BUFFER_OVERFLOW_ERROR
;
329 int32_t length
= header
.dataHeader
.headerSize
;
330 uprv_memcpy(p
, &header
, sizeof(header
));
331 uprv_memset(p
+ sizeof(header
), 0, length
- sizeof(header
));
334 length
= (int32_t)sizeof(indexes
);
335 uprv_memcpy(p
, indexes
, length
);
338 utrie2_serialize(sel
->trie
, p
, serializedTrieSize
, status
);
339 p
+= serializedTrieSize
;
341 length
= sel
->pvCount
* 4;
342 uprv_memcpy(p
, sel
->pv
, length
);
345 uprv_memcpy(p
, sel
->encodings
[0], sel
->encodingStrLength
);
346 p
+= sel
->encodingStrLength
;
352 * swap a selector into the desired Endianness and Asciiness of
353 * the system. Just as FYI, selectors are always saved in the format
354 * of the system that created them. They are only converted if used
355 * on another system. In other words, selectors created on different
356 * system can be different even if the params are identical (endianness
357 * and Asciiness differences only)
359 * @param ds pointer to data swapper containing swapping info
360 * @param inData pointer to incoming data
361 * @param length length of inData in bytes
362 * @param outData pointer to output data. Capacity should
363 * be at least equal to capacity of inData
364 * @param status an in/out ICU UErrorCode
365 * @return 0 on failure, number of bytes swapped on success
366 * number of bytes swapped can be smaller than length
369 ucnvsel_swap(const UDataSwapper
*ds
,
370 const void *inData
, int32_t length
,
371 void *outData
, UErrorCode
*status
) {
372 /* udata_swapDataHeader checks the arguments */
373 int32_t headerSize
= udata_swapDataHeader(ds
, inData
, length
, outData
, status
);
374 if(U_FAILURE(*status
)) {
378 /* check data format and format version */
379 const UDataInfo
*pInfo
= (const UDataInfo
*)((const char *)inData
+ 4);
381 pInfo
->dataFormat
[0] == 0x43 && /* dataFormat="CSel" */
382 pInfo
->dataFormat
[1] == 0x53 &&
383 pInfo
->dataFormat
[2] == 0x65 &&
384 pInfo
->dataFormat
[3] == 0x6c
386 udata_printError(ds
, "ucnvsel_swap(): data format %02x.%02x.%02x.%02x is not recognized as UConverterSelector data\n",
387 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
388 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3]);
389 *status
= U_INVALID_FORMAT_ERROR
;
392 if(pInfo
->formatVersion
[0] != 1) {
393 udata_printError(ds
, "ucnvsel_swap(): format version %02x is not supported\n",
394 pInfo
->formatVersion
[0]);
395 *status
= U_UNSUPPORTED_ERROR
;
400 length
-= headerSize
;
402 udata_printError(ds
, "ucnvsel_swap(): too few bytes (%d after header) for UConverterSelector data\n",
404 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
409 const uint8_t *inBytes
= (const uint8_t *)inData
+ headerSize
;
410 uint8_t *outBytes
= (uint8_t *)outData
+ headerSize
;
412 /* read the indexes */
413 const int32_t *inIndexes
= (const int32_t *)inBytes
;
416 for(i
= 0; i
< 16; ++i
) {
417 indexes
[i
] = udata_readInt32(ds
, inIndexes
[i
]);
420 /* get the total length of the data */
421 int32_t size
= indexes
[UCNVSEL_INDEX_SIZE
];
424 udata_printError(ds
, "ucnvsel_swap(): too few bytes (%d after header) for all of UConverterSelector data\n",
426 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
430 /* copy the data for inaccessible bytes */
431 if(inBytes
!= outBytes
) {
432 uprv_memcpy(outBytes
, inBytes
, size
);
435 int32_t offset
= 0, count
;
437 /* swap the int32_t indexes[] */
438 count
= UCNVSEL_INDEX_COUNT
*4;
439 ds
->swapArray32(ds
, inBytes
, count
, outBytes
, status
);
442 /* swap the UTrie2 */
443 count
= indexes
[UCNVSEL_INDEX_TRIE_SIZE
];
444 utrie2_swap(ds
, inBytes
+ offset
, count
, outBytes
+ offset
, status
);
447 /* swap the uint32_t pv[] */
448 count
= indexes
[UCNVSEL_INDEX_PV_COUNT
]*4;
449 ds
->swapArray32(ds
, inBytes
+ offset
, count
, outBytes
+ offset
, status
);
452 /* swap the encoding names */
453 count
= indexes
[UCNVSEL_INDEX_NAMES_LENGTH
];
454 ds
->swapInvChars(ds
, inBytes
+ offset
, count
, outBytes
+ offset
, status
);
457 U_ASSERT(offset
== size
);
460 return headerSize
+ size
;
463 /* unserialize a selector */
464 U_CAPI UConverterSelector
* U_EXPORT2
465 ucnvsel_openFromSerialized(const void* buffer
, int32_t length
, UErrorCode
* status
) {
466 // check if already failed
467 if (U_FAILURE(*status
)) {
470 // ensure args make sense!
471 const uint8_t *p
= (const uint8_t *)buffer
;
473 (length
> 0 && (p
== NULL
|| (U_POINTER_MASK_LSB(p
, 3) != 0)))
475 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
480 // not even enough space for a minimal header
481 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
484 const DataHeader
*pHeader
= (const DataHeader
*)p
;
486 pHeader
->dataHeader
.magic1
==0xda &&
487 pHeader
->dataHeader
.magic2
==0x27 &&
488 pHeader
->info
.dataFormat
[0] == 0x43 &&
489 pHeader
->info
.dataFormat
[1] == 0x53 &&
490 pHeader
->info
.dataFormat
[2] == 0x65 &&
491 pHeader
->info
.dataFormat
[3] == 0x6c
493 /* header not valid or dataFormat not recognized */
494 *status
= U_INVALID_FORMAT_ERROR
;
497 if (pHeader
->info
.formatVersion
[0] != 1) {
498 *status
= U_UNSUPPORTED_ERROR
;
501 uint8_t* swapped
= NULL
;
502 if (pHeader
->info
.isBigEndian
!= U_IS_BIG_ENDIAN
||
503 pHeader
->info
.charsetFamily
!= U_CHARSET_FAMILY
507 udata_openSwapperForInputData(p
, length
, U_IS_BIG_ENDIAN
, U_CHARSET_FAMILY
, status
);
508 int32_t totalSize
= ucnvsel_swap(ds
, p
, -1, NULL
, status
);
509 if (U_FAILURE(*status
)) {
510 udata_closeSwapper(ds
);
513 if (length
< totalSize
) {
514 udata_closeSwapper(ds
);
515 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
518 swapped
= (uint8_t*)uprv_malloc(totalSize
);
519 if (swapped
== NULL
) {
520 udata_closeSwapper(ds
);
521 *status
= U_MEMORY_ALLOCATION_ERROR
;
524 ucnvsel_swap(ds
, p
, length
, swapped
, status
);
525 udata_closeSwapper(ds
);
526 if (U_FAILURE(*status
)) {
531 pHeader
= (const DataHeader
*)p
;
533 if (length
< (pHeader
->dataHeader
.headerSize
+ 16 * 4)) {
534 // not even enough space for the header and the indexes
536 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
539 p
+= pHeader
->dataHeader
.headerSize
;
540 length
-= pHeader
->dataHeader
.headerSize
;
542 const int32_t *indexes
= (const int32_t *)p
;
543 if (length
< indexes
[UCNVSEL_INDEX_SIZE
]) {
545 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
548 p
+= UCNVSEL_INDEX_COUNT
* 4;
549 // create and populate the selector object
550 UConverterSelector
* sel
= (UConverterSelector
*)uprv_malloc(sizeof(UConverterSelector
));
552 (char **)uprv_malloc(
553 indexes
[UCNVSEL_INDEX_NAMES_COUNT
] * sizeof(char *));
554 if (sel
== NULL
|| encodings
== NULL
) {
557 uprv_free(encodings
);
558 *status
= U_MEMORY_ALLOCATION_ERROR
;
561 uprv_memset(sel
, 0, sizeof(UConverterSelector
));
562 sel
->pvCount
= indexes
[UCNVSEL_INDEX_PV_COUNT
];
563 sel
->encodings
= encodings
;
564 sel
->encodingsCount
= indexes
[UCNVSEL_INDEX_NAMES_COUNT
];
565 sel
->encodingStrLength
= indexes
[UCNVSEL_INDEX_NAMES_LENGTH
];
566 sel
->swapped
= swapped
;
568 sel
->trie
= utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS
,
569 p
, indexes
[UCNVSEL_INDEX_TRIE_SIZE
], NULL
,
571 p
+= indexes
[UCNVSEL_INDEX_TRIE_SIZE
];
572 if (U_FAILURE(*status
)) {
577 sel
->pv
= (uint32_t *)p
;
578 p
+= sel
->pvCount
* 4;
581 for (int32_t i
= 0; i
< sel
->encodingsCount
; ++i
) {
582 sel
->encodings
[i
] = s
;
583 s
+= uprv_strlen(s
) + 1;
585 p
+= sel
->encodingStrLength
;
590 // a bunch of functions for the enumeration thingie! Nothing fancy here. Just
591 // iterate over the selected encodings
596 const UConverterSelector
* sel
;
601 static void U_CALLCONV
602 ucnvsel_close_selector_iterator(UEnumeration
*enumerator
) {
603 uprv_free(((Enumerator
*)(enumerator
->context
))->index
);
604 uprv_free(enumerator
->context
);
605 uprv_free(enumerator
);
609 static int32_t U_CALLCONV
610 ucnvsel_count_encodings(UEnumeration
*enumerator
, UErrorCode
*status
) {
611 // check if already failed
612 if (U_FAILURE(*status
)) {
615 return ((Enumerator
*)(enumerator
->context
))->length
;
619 static const char* U_CALLCONV
ucnvsel_next_encoding(UEnumeration
* enumerator
,
620 int32_t* resultLength
,
621 UErrorCode
* status
) {
622 // check if already failed
623 if (U_FAILURE(*status
)) {
627 int16_t cur
= ((Enumerator
*)(enumerator
->context
))->cur
;
628 const UConverterSelector
* sel
;
630 if (cur
>= ((Enumerator
*)(enumerator
->context
))->length
) {
633 sel
= ((Enumerator
*)(enumerator
->context
))->sel
;
634 result
= sel
->encodings
[((Enumerator
*)(enumerator
->context
))->index
[cur
] ];
635 ((Enumerator
*)(enumerator
->context
))->cur
++;
637 *resultLength
= (int32_t)uprv_strlen(result
);
642 static void U_CALLCONV
ucnvsel_reset_iterator(UEnumeration
* enumerator
,
643 UErrorCode
* status
) {
644 // check if already failed
645 if (U_FAILURE(*status
)) {
648 ((Enumerator
*)(enumerator
->context
))->cur
= 0;
654 static const UEnumeration defaultEncodings
= {
657 ucnvsel_close_selector_iterator
,
658 ucnvsel_count_encodings
,
660 ucnvsel_next_encoding
,
661 ucnvsel_reset_iterator
665 // internal fn to intersect two sets of masks
666 // returns whether the mask has reduced to all zeros
667 static UBool
intersectMasks(uint32_t* dest
, const uint32_t* source1
, int32_t len
) {
669 uint32_t oredDest
= 0;
670 for (i
= 0 ; i
< len
; ++i
) {
671 oredDest
|= (dest
[i
] &= source1
[i
]);
673 return oredDest
== 0;
676 // internal fn to count how many 1's are there in a mask
677 // algorithm taken from http://graphics.stanford.edu/~seander/bithacks.html
678 static int16_t countOnes(uint32_t* mask
, int32_t len
) {
679 int32_t i
, totalOnes
= 0;
680 for (i
= 0 ; i
< len
; ++i
) {
681 uint32_t ent
= mask
[i
];
682 for (; ent
; totalOnes
++)
684 ent
&= ent
- 1; // clear the least significant bit set
691 /* internal function! */
692 static UEnumeration
*selectForMask(const UConverterSelector
* sel
,
693 uint32_t *mask
, UErrorCode
*status
) {
694 // this is the context we will use. Store a table of indices to which
695 // encodings are legit.
696 struct Enumerator
* result
= (Enumerator
*)uprv_malloc(sizeof(Enumerator
));
697 if (result
== NULL
) {
699 *status
= U_MEMORY_ALLOCATION_ERROR
;
702 result
->index
= NULL
; // this will be allocated later!
703 result
->length
= result
->cur
= 0;
706 UEnumeration
*en
= (UEnumeration
*)uprv_malloc(sizeof(UEnumeration
));
708 // TODO(markus): Combine Enumerator and UEnumeration into one struct.
711 *status
= U_MEMORY_ALLOCATION_ERROR
;
714 memcpy(en
, &defaultEncodings
, sizeof(UEnumeration
));
715 en
->context
= result
;
717 int32_t columns
= (sel
->encodingsCount
+31)/32;
718 int16_t numOnes
= countOnes(mask
, columns
);
719 // now, we know the exact space we need for index
721 result
->index
= (int16_t*) uprv_malloc(numOnes
* sizeof(int16_t));
725 for (j
= 0 ; j
< columns
; j
++) {
726 uint32_t v
= mask
[j
];
727 for (i
= 0 ; i
< 32 && k
< sel
->encodingsCount
; i
++, k
++) {
729 result
->index
[result
->length
++] = k
;
734 } //otherwise, index will remain NULL (and will never be touched by
735 //the enumerator code anyway)
740 /* check a string against the selector - UTF16 version */
741 U_CAPI UEnumeration
* U_EXPORT2
742 ucnvsel_selectForString(const UConverterSelector
* sel
,
743 const UChar
*s
, int32_t length
, UErrorCode
*status
) {
744 // check if already failed
745 if (U_FAILURE(*status
)) {
748 // ensure args make sense!
749 if (sel
== NULL
|| (s
== NULL
&& length
!= 0)) {
750 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
754 int32_t columns
= (sel
->encodingsCount
+31)/32;
755 uint32_t* mask
= (uint32_t*) uprv_malloc(columns
* 4);
757 *status
= U_MEMORY_ALLOCATION_ERROR
;
760 uprv_memset(mask
, ~0, columns
*4);
770 while (limit
== NULL
? *s
!= 0 : s
!= limit
) {
773 UTRIE2_U16_NEXT16(sel
->trie
, s
, limit
, c
, pvIndex
);
774 if (intersectMasks(mask
, sel
->pv
+pvIndex
, columns
)) {
779 return selectForMask(sel
, mask
, status
);
782 /* check a string against the selector - UTF8 version */
783 U_CAPI UEnumeration
* U_EXPORT2
784 ucnvsel_selectForUTF8(const UConverterSelector
* sel
,
785 const char *s
, int32_t length
, UErrorCode
*status
) {
786 // check if already failed
787 if (U_FAILURE(*status
)) {
790 // ensure args make sense!
791 if (sel
== NULL
|| (s
== NULL
&& length
!= 0)) {
792 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
796 int32_t columns
= (sel
->encodingsCount
+31)/32;
797 uint32_t* mask
= (uint32_t*) uprv_malloc(columns
* 4);
799 *status
= U_MEMORY_ALLOCATION_ERROR
;
802 uprv_memset(mask
, ~0, columns
*4);
805 length
= (int32_t)uprv_strlen(s
);
809 const char *limit
= s
+ length
;
813 UTRIE2_U8_NEXT16(sel
->trie
, s
, limit
, pvIndex
);
814 if (intersectMasks(mask
, sel
->pv
+pvIndex
, columns
)) {
819 return selectForMask(sel
, mask
, status
);
822 #endif // !UCONFIG_NO_CONVERSION