1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2008-2011, International Business Machines
7 * Corporation, Google and others. All Rights Reserved.
9 *******************************************************************************
11 // Author : eldawy@google.com (Mohamed Eldawy)
14 // Purpose: To generate a list of encodings capable of handling
15 // a given Unicode text
17 // Started 09-April-2008
22 * This is an implementation of an encoding selector.
23 * The goal is, given a unicode string, find the encodings
24 * this string can be mapped to. To make processing faster
25 * a trie is built when you call ucnvsel_open() that
26 * stores all encodings a codepoint can map to
29 #include "unicode/ucnvsel.h"
31 #if !UCONFIG_NO_CONVERSION
35 #include "unicode/uchar.h"
36 #include "unicode/uniset.h"
37 #include "unicode/ucnv.h"
38 #include "unicode/ustring.h"
39 #include "unicode/uchriter.h"
51 struct UConverterSelector
{
52 UTrie2
*trie
; // 16 bit trie containing offsets into pv
53 uint32_t* pv
; // table of bits!
55 char** encodings
; // which encodings did user ask to use?
56 int32_t encodingsCount
;
57 int32_t encodingStrLength
;
59 UBool ownPv
, ownEncodingStrings
;
62 static void generateSelectorData(UConverterSelector
* result
,
64 const USet
* excludedCodePoints
,
65 const UConverterUnicodeSet whichSet
,
67 if (U_FAILURE(*status
)) {
71 int32_t columns
= (result
->encodingsCount
+31)/32;
73 // set errorValue to all-ones
74 for (int32_t col
= 0; col
< columns
; col
++) {
75 upvec_setValue(upvec
, UPVEC_ERROR_VALUE_CP
, UPVEC_ERROR_VALUE_CP
,
76 col
, static_cast<uint32_t>(~0), static_cast<uint32_t>(~0), status
);
79 for (int32_t i
= 0; i
< result
->encodingsCount
; ++i
) {
84 UConverter
* test_converter
= ucnv_open(result
->encodings
[i
], status
);
85 if (U_FAILURE(*status
)) {
88 USet
* unicode_point_set
;
89 unicode_point_set
= uset_open(1, 0); // empty set
91 ucnv_getUnicodeSet(test_converter
, unicode_point_set
,
93 if (U_FAILURE(*status
)) {
94 ucnv_close(test_converter
);
100 // now iterate over intervals on set i!
101 item_count
= uset_getItemCount(unicode_point_set
);
103 for (j
= 0; j
< item_count
; ++j
) {
106 UErrorCode smallStatus
= U_ZERO_ERROR
;
107 uset_getItem(unicode_point_set
, j
, &start_char
, &end_char
, NULL
, 0,
109 if (U_FAILURE(smallStatus
)) {
110 // this will be reached for the converters that fill the set with
111 // strings. Those should be ignored by our system
113 upvec_setValue(upvec
, start_char
, end_char
, column
, static_cast<uint32_t>(~0), mask
,
117 ucnv_close(test_converter
);
118 uset_close(unicode_point_set
);
119 if (U_FAILURE(*status
)) {
124 // handle excluded encodings! Simply set their values to all 1's in the upvec
125 if (excludedCodePoints
) {
126 int32_t item_count
= uset_getItemCount(excludedCodePoints
);
127 for (int32_t j
= 0; j
< item_count
; ++j
) {
131 uset_getItem(excludedCodePoints
, j
, &start_char
, &end_char
, NULL
, 0,
133 for (int32_t col
= 0; col
< columns
; col
++) {
134 upvec_setValue(upvec
, start_char
, end_char
, col
, static_cast<uint32_t>(~0), static_cast<uint32_t>(~0),
140 // alright. Now, let's put things in the same exact form you'd get when you
141 // unserialize things.
142 result
->trie
= upvec_compactToUTrie2WithRowIndexes(upvec
, status
);
143 result
->pv
= upvec_cloneArray(upvec
, &result
->pvCount
, NULL
, status
);
144 result
->pvCount
*= columns
; // number of uint32_t = rows * columns
145 result
->ownPv
= TRUE
;
148 /* open a selector. If converterListSize is 0, build for all converters.
149 If excludedCodePoints is NULL, don't exclude any codepoints */
150 U_CAPI UConverterSelector
* U_EXPORT2
151 ucnvsel_open(const char* const* converterList
, int32_t converterListSize
,
152 const USet
* excludedCodePoints
,
153 const UConverterUnicodeSet whichSet
, UErrorCode
* status
) {
154 // check if already failed
155 if (U_FAILURE(*status
)) {
158 // ensure args make sense!
159 if (converterListSize
< 0 || (converterList
== NULL
&& converterListSize
!= 0)) {
160 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
164 // allocate a new converter
165 LocalUConverterSelectorPointer
newSelector(
166 (UConverterSelector
*)uprv_malloc(sizeof(UConverterSelector
)));
167 if (newSelector
.isNull()) {
168 *status
= U_MEMORY_ALLOCATION_ERROR
;
171 uprv_memset(newSelector
.getAlias(), 0, sizeof(UConverterSelector
));
173 if (converterListSize
== 0) {
174 converterList
= NULL
;
175 converterListSize
= ucnv_countAvailable();
177 newSelector
->encodings
=
178 (char**)uprv_malloc(converterListSize
* sizeof(char*));
179 if (!newSelector
->encodings
) {
180 *status
= U_MEMORY_ALLOCATION_ERROR
;
183 newSelector
->encodings
[0] = NULL
; // now we can call ucnvsel_close()
185 // make a backup copy of the list of converters
186 int32_t totalSize
= 0;
188 for (i
= 0; i
< converterListSize
; i
++) {
190 (int32_t)uprv_strlen(converterList
!= NULL
? converterList
[i
] : ucnv_getAvailableName(i
)) + 1;
192 // 4-align the totalSize to 4-align the size of the serialized form
193 int32_t encodingStrPadding
= totalSize
& 3;
194 if (encodingStrPadding
!= 0) {
195 encodingStrPadding
= 4 - encodingStrPadding
;
197 newSelector
->encodingStrLength
= totalSize
+= encodingStrPadding
;
198 char* allStrings
= (char*) uprv_malloc(totalSize
);
200 *status
= U_MEMORY_ALLOCATION_ERROR
;
204 for (i
= 0; i
< converterListSize
; i
++) {
205 newSelector
->encodings
[i
] = allStrings
;
206 uprv_strcpy(newSelector
->encodings
[i
],
207 converterList
!= NULL
? converterList
[i
] : ucnv_getAvailableName(i
));
208 allStrings
+= uprv_strlen(newSelector
->encodings
[i
]) + 1;
210 while (encodingStrPadding
> 0) {
212 --encodingStrPadding
;
215 newSelector
->ownEncodingStrings
= TRUE
;
216 newSelector
->encodingsCount
= converterListSize
;
217 UPropsVectors
*upvec
= upvec_open((converterListSize
+31)/32, status
);
218 generateSelectorData(newSelector
.getAlias(), upvec
, excludedCodePoints
, whichSet
, status
);
221 if (U_FAILURE(*status
)) {
225 return newSelector
.orphan();
228 /* close opened selector */
229 U_CAPI
void U_EXPORT2
230 ucnvsel_close(UConverterSelector
*sel
) {
234 if (sel
->ownEncodingStrings
) {
235 uprv_free(sel
->encodings
[0]);
237 uprv_free(sel
->encodings
);
241 utrie2_close(sel
->trie
);
242 uprv_free(sel
->swapped
);
246 static const UDataInfo dataInfo
= {
255 { 0x43, 0x53, 0x65, 0x6c }, /* dataFormat="CSel" */
256 { 1, 0, 0, 0 }, /* formatVersion */
257 { 0, 0, 0, 0 } /* dataVersion */
261 UCNVSEL_INDEX_TRIE_SIZE
, // trie size in bytes
262 UCNVSEL_INDEX_PV_COUNT
, // number of uint32_t in the bit vectors
263 UCNVSEL_INDEX_NAMES_COUNT
, // number of encoding names
264 UCNVSEL_INDEX_NAMES_LENGTH
, // number of encoding name bytes including padding
265 UCNVSEL_INDEX_SIZE
= 15, // bytes following the DataHeader
266 UCNVSEL_INDEX_COUNT
= 16
270 * Serialized form of a UConverterSelector, formatVersion 1:
272 * The serialized form begins with a standard ICU DataHeader with a UDataInfo
273 * as the template above.
274 * This is followed by:
275 * int32_t indexes[UCNVSEL_INDEX_COUNT]; // see index entry constants above
276 * serialized UTrie2; // indexes[UCNVSEL_INDEX_TRIE_SIZE] bytes
277 * uint32_t pv[indexes[UCNVSEL_INDEX_PV_COUNT]]; // bit vectors
278 * char* encodingNames[indexes[UCNVSEL_INDEX_NAMES_LENGTH]]; // NUL-terminated strings + padding
281 /* serialize a selector */
282 U_CAPI
int32_t U_EXPORT2
283 ucnvsel_serialize(const UConverterSelector
* sel
,
284 void* buffer
, int32_t bufferCapacity
, UErrorCode
* status
) {
285 // check if already failed
286 if (U_FAILURE(*status
)) {
289 // ensure args make sense!
290 uint8_t *p
= (uint8_t *)buffer
;
291 if (bufferCapacity
< 0 ||
292 (bufferCapacity
> 0 && (p
== NULL
|| (U_POINTER_MASK_LSB(p
, 3) != 0)))
294 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
297 // add up the size of the serialized form
298 int32_t serializedTrieSize
= utrie2_serialize(sel
->trie
, NULL
, 0, status
);
299 if (*status
!= U_BUFFER_OVERFLOW_ERROR
&& U_FAILURE(*status
)) {
302 *status
= U_ZERO_ERROR
;
305 uprv_memset(&header
, 0, sizeof(header
));
306 header
.dataHeader
.headerSize
= (uint16_t)((sizeof(header
) + 15) & ~15);
307 header
.dataHeader
.magic1
= 0xda;
308 header
.dataHeader
.magic2
= 0x27;
309 uprv_memcpy(&header
.info
, &dataInfo
, sizeof(dataInfo
));
311 int32_t indexes
[UCNVSEL_INDEX_COUNT
] = {
315 sel
->encodingStrLength
319 header
.dataHeader
.headerSize
+
320 (int32_t)sizeof(indexes
) +
323 sel
->encodingStrLength
;
324 indexes
[UCNVSEL_INDEX_SIZE
] = totalSize
- header
.dataHeader
.headerSize
;
325 if (totalSize
> bufferCapacity
) {
326 *status
= U_BUFFER_OVERFLOW_ERROR
;
330 int32_t length
= header
.dataHeader
.headerSize
;
331 uprv_memcpy(p
, &header
, sizeof(header
));
332 uprv_memset(p
+ sizeof(header
), 0, length
- sizeof(header
));
335 length
= (int32_t)sizeof(indexes
);
336 uprv_memcpy(p
, indexes
, length
);
339 utrie2_serialize(sel
->trie
, p
, serializedTrieSize
, status
);
340 p
+= serializedTrieSize
;
342 length
= sel
->pvCount
* 4;
343 uprv_memcpy(p
, sel
->pv
, length
);
346 uprv_memcpy(p
, sel
->encodings
[0], sel
->encodingStrLength
);
347 p
+= sel
->encodingStrLength
;
353 * swap a selector into the desired Endianness and Asciiness of
354 * the system. Just as FYI, selectors are always saved in the format
355 * of the system that created them. They are only converted if used
356 * on another system. In other words, selectors created on different
357 * system can be different even if the params are identical (endianness
358 * and Asciiness differences only)
360 * @param ds pointer to data swapper containing swapping info
361 * @param inData pointer to incoming data
362 * @param length length of inData in bytes
363 * @param outData pointer to output data. Capacity should
364 * be at least equal to capacity of inData
365 * @param status an in/out ICU UErrorCode
366 * @return 0 on failure, number of bytes swapped on success
367 * number of bytes swapped can be smaller than length
370 ucnvsel_swap(const UDataSwapper
*ds
,
371 const void *inData
, int32_t length
,
372 void *outData
, UErrorCode
*status
) {
373 /* udata_swapDataHeader checks the arguments */
374 int32_t headerSize
= udata_swapDataHeader(ds
, inData
, length
, outData
, status
);
375 if(U_FAILURE(*status
)) {
379 /* check data format and format version */
380 const UDataInfo
*pInfo
= (const UDataInfo
*)((const char *)inData
+ 4);
382 pInfo
->dataFormat
[0] == 0x43 && /* dataFormat="CSel" */
383 pInfo
->dataFormat
[1] == 0x53 &&
384 pInfo
->dataFormat
[2] == 0x65 &&
385 pInfo
->dataFormat
[3] == 0x6c
387 udata_printError(ds
, "ucnvsel_swap(): data format %02x.%02x.%02x.%02x is not recognized as UConverterSelector data\n",
388 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
389 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3]);
390 *status
= U_INVALID_FORMAT_ERROR
;
393 if(pInfo
->formatVersion
[0] != 1) {
394 udata_printError(ds
, "ucnvsel_swap(): format version %02x is not supported\n",
395 pInfo
->formatVersion
[0]);
396 *status
= U_UNSUPPORTED_ERROR
;
401 length
-= headerSize
;
403 udata_printError(ds
, "ucnvsel_swap(): too few bytes (%d after header) for UConverterSelector data\n",
405 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
410 const uint8_t *inBytes
= (const uint8_t *)inData
+ headerSize
;
411 uint8_t *outBytes
= (uint8_t *)outData
+ headerSize
;
413 /* read the indexes */
414 const int32_t *inIndexes
= (const int32_t *)inBytes
;
417 for(i
= 0; i
< 16; ++i
) {
418 indexes
[i
] = udata_readInt32(ds
, inIndexes
[i
]);
421 /* get the total length of the data */
422 int32_t size
= indexes
[UCNVSEL_INDEX_SIZE
];
425 udata_printError(ds
, "ucnvsel_swap(): too few bytes (%d after header) for all of UConverterSelector data\n",
427 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
431 /* copy the data for inaccessible bytes */
432 if(inBytes
!= outBytes
) {
433 uprv_memcpy(outBytes
, inBytes
, size
);
436 int32_t offset
= 0, count
;
438 /* swap the int32_t indexes[] */
439 count
= UCNVSEL_INDEX_COUNT
*4;
440 ds
->swapArray32(ds
, inBytes
, count
, outBytes
, status
);
443 /* swap the UTrie2 */
444 count
= indexes
[UCNVSEL_INDEX_TRIE_SIZE
];
445 utrie2_swap(ds
, inBytes
+ offset
, count
, outBytes
+ offset
, status
);
448 /* swap the uint32_t pv[] */
449 count
= indexes
[UCNVSEL_INDEX_PV_COUNT
]*4;
450 ds
->swapArray32(ds
, inBytes
+ offset
, count
, outBytes
+ offset
, status
);
453 /* swap the encoding names */
454 count
= indexes
[UCNVSEL_INDEX_NAMES_LENGTH
];
455 ds
->swapInvChars(ds
, inBytes
+ offset
, count
, outBytes
+ offset
, status
);
458 U_ASSERT(offset
== size
);
461 return headerSize
+ size
;
464 /* unserialize a selector */
465 U_CAPI UConverterSelector
* U_EXPORT2
466 ucnvsel_openFromSerialized(const void* buffer
, int32_t length
, UErrorCode
* status
) {
467 // check if already failed
468 if (U_FAILURE(*status
)) {
471 // ensure args make sense!
472 const uint8_t *p
= (const uint8_t *)buffer
;
474 (length
> 0 && (p
== NULL
|| (U_POINTER_MASK_LSB(p
, 3) != 0)))
476 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
481 // not even enough space for a minimal header
482 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
485 const DataHeader
*pHeader
= (const DataHeader
*)p
;
487 pHeader
->dataHeader
.magic1
==0xda &&
488 pHeader
->dataHeader
.magic2
==0x27 &&
489 pHeader
->info
.dataFormat
[0] == 0x43 &&
490 pHeader
->info
.dataFormat
[1] == 0x53 &&
491 pHeader
->info
.dataFormat
[2] == 0x65 &&
492 pHeader
->info
.dataFormat
[3] == 0x6c
494 /* header not valid or dataFormat not recognized */
495 *status
= U_INVALID_FORMAT_ERROR
;
498 if (pHeader
->info
.formatVersion
[0] != 1) {
499 *status
= U_UNSUPPORTED_ERROR
;
502 uint8_t* swapped
= NULL
;
503 if (pHeader
->info
.isBigEndian
!= U_IS_BIG_ENDIAN
||
504 pHeader
->info
.charsetFamily
!= U_CHARSET_FAMILY
508 udata_openSwapperForInputData(p
, length
, U_IS_BIG_ENDIAN
, U_CHARSET_FAMILY
, status
);
509 int32_t totalSize
= ucnvsel_swap(ds
, p
, -1, NULL
, status
);
510 if (U_FAILURE(*status
)) {
511 udata_closeSwapper(ds
);
514 if (length
< totalSize
) {
515 udata_closeSwapper(ds
);
516 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
519 swapped
= (uint8_t*)uprv_malloc(totalSize
);
520 if (swapped
== NULL
) {
521 udata_closeSwapper(ds
);
522 *status
= U_MEMORY_ALLOCATION_ERROR
;
525 ucnvsel_swap(ds
, p
, length
, swapped
, status
);
526 udata_closeSwapper(ds
);
527 if (U_FAILURE(*status
)) {
532 pHeader
= (const DataHeader
*)p
;
534 if (length
< (pHeader
->dataHeader
.headerSize
+ 16 * 4)) {
535 // not even enough space for the header and the indexes
537 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
540 p
+= pHeader
->dataHeader
.headerSize
;
541 length
-= pHeader
->dataHeader
.headerSize
;
543 const int32_t *indexes
= (const int32_t *)p
;
544 if (length
< indexes
[UCNVSEL_INDEX_SIZE
]) {
546 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
549 p
+= UCNVSEL_INDEX_COUNT
* 4;
550 // create and populate the selector object
551 UConverterSelector
* sel
= (UConverterSelector
*)uprv_malloc(sizeof(UConverterSelector
));
553 (char **)uprv_malloc(
554 indexes
[UCNVSEL_INDEX_NAMES_COUNT
] * sizeof(char *));
555 if (sel
== NULL
|| encodings
== NULL
) {
558 uprv_free(encodings
);
559 *status
= U_MEMORY_ALLOCATION_ERROR
;
562 uprv_memset(sel
, 0, sizeof(UConverterSelector
));
563 sel
->pvCount
= indexes
[UCNVSEL_INDEX_PV_COUNT
];
564 sel
->encodings
= encodings
;
565 sel
->encodingsCount
= indexes
[UCNVSEL_INDEX_NAMES_COUNT
];
566 sel
->encodingStrLength
= indexes
[UCNVSEL_INDEX_NAMES_LENGTH
];
567 sel
->swapped
= swapped
;
569 sel
->trie
= utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS
,
570 p
, indexes
[UCNVSEL_INDEX_TRIE_SIZE
], NULL
,
572 p
+= indexes
[UCNVSEL_INDEX_TRIE_SIZE
];
573 if (U_FAILURE(*status
)) {
578 sel
->pv
= (uint32_t *)p
;
579 p
+= sel
->pvCount
* 4;
582 for (int32_t i
= 0; i
< sel
->encodingsCount
; ++i
) {
583 sel
->encodings
[i
] = s
;
584 s
+= uprv_strlen(s
) + 1;
586 p
+= sel
->encodingStrLength
;
591 // a bunch of functions for the enumeration thingie! Nothing fancy here. Just
592 // iterate over the selected encodings
597 const UConverterSelector
* sel
;
602 static void U_CALLCONV
603 ucnvsel_close_selector_iterator(UEnumeration
*enumerator
) {
604 uprv_free(((Enumerator
*)(enumerator
->context
))->index
);
605 uprv_free(enumerator
->context
);
606 uprv_free(enumerator
);
610 static int32_t U_CALLCONV
611 ucnvsel_count_encodings(UEnumeration
*enumerator
, UErrorCode
*status
) {
612 // check if already failed
613 if (U_FAILURE(*status
)) {
616 return ((Enumerator
*)(enumerator
->context
))->length
;
620 static const char* U_CALLCONV
ucnvsel_next_encoding(UEnumeration
* enumerator
,
621 int32_t* resultLength
,
622 UErrorCode
* status
) {
623 // check if already failed
624 if (U_FAILURE(*status
)) {
628 int16_t cur
= ((Enumerator
*)(enumerator
->context
))->cur
;
629 const UConverterSelector
* sel
;
631 if (cur
>= ((Enumerator
*)(enumerator
->context
))->length
) {
634 sel
= ((Enumerator
*)(enumerator
->context
))->sel
;
635 result
= sel
->encodings
[((Enumerator
*)(enumerator
->context
))->index
[cur
] ];
636 ((Enumerator
*)(enumerator
->context
))->cur
++;
638 *resultLength
= (int32_t)uprv_strlen(result
);
643 static void U_CALLCONV
ucnvsel_reset_iterator(UEnumeration
* enumerator
,
644 UErrorCode
* status
) {
645 // check if already failed
646 if (U_FAILURE(*status
)) {
649 ((Enumerator
*)(enumerator
->context
))->cur
= 0;
655 static const UEnumeration defaultEncodings
= {
658 ucnvsel_close_selector_iterator
,
659 ucnvsel_count_encodings
,
661 ucnvsel_next_encoding
,
662 ucnvsel_reset_iterator
666 // internal fn to intersect two sets of masks
667 // returns whether the mask has reduced to all zeros
668 static UBool
intersectMasks(uint32_t* dest
, const uint32_t* source1
, int32_t len
) {
670 uint32_t oredDest
= 0;
671 for (i
= 0 ; i
< len
; ++i
) {
672 oredDest
|= (dest
[i
] &= source1
[i
]);
674 return oredDest
== 0;
677 // internal fn to count how many 1's are there in a mask
678 // algorithm taken from http://graphics.stanford.edu/~seander/bithacks.html
679 static int16_t countOnes(uint32_t* mask
, int32_t len
) {
680 int32_t i
, totalOnes
= 0;
681 for (i
= 0 ; i
< len
; ++i
) {
682 uint32_t ent
= mask
[i
];
683 for (; ent
; totalOnes
++)
685 ent
&= ent
- 1; // clear the least significant bit set
688 return static_cast<int16_t>(totalOnes
);
692 /* internal function! */
693 static UEnumeration
*selectForMask(const UConverterSelector
* sel
,
694 uint32_t *mask
, UErrorCode
*status
) {
695 // this is the context we will use. Store a table of indices to which
696 // encodings are legit.
697 struct Enumerator
* result
= (Enumerator
*)uprv_malloc(sizeof(Enumerator
));
698 if (result
== NULL
) {
700 *status
= U_MEMORY_ALLOCATION_ERROR
;
703 result
->index
= NULL
; // this will be allocated later!
704 result
->length
= result
->cur
= 0;
707 UEnumeration
*en
= (UEnumeration
*)uprv_malloc(sizeof(UEnumeration
));
709 // TODO(markus): Combine Enumerator and UEnumeration into one struct.
712 *status
= U_MEMORY_ALLOCATION_ERROR
;
715 memcpy(en
, &defaultEncodings
, sizeof(UEnumeration
));
716 en
->context
= result
;
718 int32_t columns
= (sel
->encodingsCount
+31)/32;
719 int16_t numOnes
= countOnes(mask
, columns
);
720 // now, we know the exact space we need for index
722 result
->index
= (int16_t*) uprv_malloc(numOnes
* sizeof(int16_t));
726 for (j
= 0 ; j
< columns
; j
++) {
727 uint32_t v
= mask
[j
];
728 for (i
= 0 ; i
< 32 && k
< sel
->encodingsCount
; i
++, k
++) {
730 result
->index
[result
->length
++] = k
;
735 } //otherwise, index will remain NULL (and will never be touched by
736 //the enumerator code anyway)
741 /* check a string against the selector - UTF16 version */
742 U_CAPI UEnumeration
* U_EXPORT2
743 ucnvsel_selectForString(const UConverterSelector
* sel
,
744 const UChar
*s
, int32_t length
, UErrorCode
*status
) {
745 // check if already failed
746 if (U_FAILURE(*status
)) {
749 // ensure args make sense!
750 if (sel
== NULL
|| (s
== NULL
&& length
!= 0)) {
751 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
755 int32_t columns
= (sel
->encodingsCount
+31)/32;
756 uint32_t* mask
= (uint32_t*) uprv_malloc(columns
* 4);
758 *status
= U_MEMORY_ALLOCATION_ERROR
;
761 uprv_memset(mask
, ~0, columns
*4);
771 while (limit
== NULL
? *s
!= 0 : s
!= limit
) {
774 UTRIE2_U16_NEXT16(sel
->trie
, s
, limit
, c
, pvIndex
);
775 if (intersectMasks(mask
, sel
->pv
+pvIndex
, columns
)) {
780 return selectForMask(sel
, mask
, status
);
783 /* check a string against the selector - UTF8 version */
784 U_CAPI UEnumeration
* U_EXPORT2
785 ucnvsel_selectForUTF8(const UConverterSelector
* sel
,
786 const char *s
, int32_t length
, UErrorCode
*status
) {
787 // check if already failed
788 if (U_FAILURE(*status
)) {
791 // ensure args make sense!
792 if (sel
== NULL
|| (s
== NULL
&& length
!= 0)) {
793 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
797 int32_t columns
= (sel
->encodingsCount
+31)/32;
798 uint32_t* mask
= (uint32_t*) uprv_malloc(columns
* 4);
800 *status
= U_MEMORY_ALLOCATION_ERROR
;
803 uprv_memset(mask
, ~0, columns
*4);
806 length
= (int32_t)uprv_strlen(s
);
810 const char *limit
= s
+ length
;
814 UTRIE2_U8_NEXT16(sel
->trie
, s
, limit
, pvIndex
);
815 if (intersectMasks(mask
, sel
->pv
+pvIndex
, columns
)) {
820 return selectForMask(sel
, mask
, status
);
823 #endif // !UCONFIG_NO_CONVERSION