]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
374ca955 A |
3 | /* |
4 | ****************************************************************************** | |
5 | * | |
2ca993e8 | 6 | * Copyright (C) 2003-2016, International Business Machines |
374ca955 A |
7 | * Corporation and others. All Rights Reserved. |
8 | * | |
9 | ****************************************************************************** | |
4388f060 | 10 | * file name: ucnv_ext.cpp |
f3c0d7a5 | 11 | * encoding: UTF-8 |
374ca955 A |
12 | * tab size: 8 (not used) |
13 | * indentation:4 | |
14 | * | |
15 | * created on: 2003jun13 | |
16 | * created by: Markus W. Scherer | |
17 | * | |
18 | * Conversion extensions | |
19 | */ | |
20 | ||
21 | #include "unicode/utypes.h" | |
22 | ||
23 | #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION | |
24 | ||
25 | #include "unicode/uset.h" | |
f3c0d7a5 | 26 | #include "unicode/ustring.h" |
374ca955 A |
27 | #include "ucnv_bld.h" |
28 | #include "ucnv_cnv.h" | |
29 | #include "ucnv_ext.h" | |
30 | #include "cmemory.h" | |
4388f060 | 31 | #include "uassert.h" |
374ca955 A |
32 | |
33 | /* to Unicode --------------------------------------------------------------- */ | |
34 | ||
35 | /* | |
36 | * @return lookup value for the byte, if found; else 0 | |
37 | */ | |
4388f060 | 38 | static inline uint32_t |
374ca955 A |
39 | ucnv_extFindToU(const uint32_t *toUSection, int32_t length, uint8_t byte) { |
40 | uint32_t word0, word; | |
41 | int32_t i, start, limit; | |
42 | ||
43 | /* check the input byte against the lowest and highest section bytes */ | |
44 | start=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[0]); | |
45 | limit=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[length-1]); | |
46 | if(byte<start || limit<byte) { | |
47 | return 0; /* the byte is out of range */ | |
48 | } | |
49 | ||
50 | if(length==((limit-start)+1)) { | |
51 | /* direct access on a linear array */ | |
52 | return UCNV_EXT_TO_U_GET_VALUE(toUSection[byte-start]); /* could be 0 */ | |
53 | } | |
54 | ||
55 | /* word0 is suitable for <=toUSection[] comparison, word for <toUSection[] */ | |
56 | word0=UCNV_EXT_TO_U_MAKE_WORD(byte, 0); | |
57 | ||
58 | /* | |
59 | * Shift byte once instead of each section word and add 0xffffff. | |
60 | * We will compare the shifted/added byte (bbffffff) against | |
61 | * section words which have byte values in the same bit position. | |
62 | * If and only if byte bb < section byte ss then bbffffff<ssvvvvvv | |
63 | * for all v=0..f | |
64 | * so we need not mask off the lower 24 bits of each section word. | |
65 | */ | |
66 | word=word0|UCNV_EXT_TO_U_VALUE_MASK; | |
67 | ||
68 | /* binary search */ | |
69 | start=0; | |
70 | limit=length; | |
71 | for(;;) { | |
72 | i=limit-start; | |
73 | if(i<=1) { | |
74 | break; /* done */ | |
75 | } | |
76 | /* start<limit-1 */ | |
77 | ||
78 | if(i<=4) { | |
79 | /* linear search for the last part */ | |
80 | if(word0<=toUSection[start]) { | |
81 | break; | |
82 | } | |
83 | if(++start<limit && word0<=toUSection[start]) { | |
84 | break; | |
85 | } | |
86 | if(++start<limit && word0<=toUSection[start]) { | |
87 | break; | |
88 | } | |
89 | /* always break at start==limit-1 */ | |
90 | ++start; | |
91 | break; | |
92 | } | |
93 | ||
94 | i=(start+limit)/2; | |
95 | if(word<toUSection[i]) { | |
96 | limit=i; | |
97 | } else { | |
98 | start=i; | |
99 | } | |
100 | } | |
101 | ||
102 | /* did we really find it? */ | |
103 | if(start<limit && byte==UCNV_EXT_TO_U_GET_BYTE(word=toUSection[start])) { | |
104 | return UCNV_EXT_TO_U_GET_VALUE(word); /* never 0 */ | |
105 | } else { | |
106 | return 0; /* not found */ | |
107 | } | |
108 | } | |
109 | ||
110 | /* | |
111 | * TRUE if not an SI/SO stateful converter, | |
112 | * or if the match length fits with the current converter state | |
113 | */ | |
114 | #define UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, match) \ | |
115 | ((sisoState)<0 || ((sisoState)==0) == (match==1)) | |
116 | ||
117 | /* | |
118 | * this works like ucnv_extMatchFromU() except | |
119 | * - the first character is in pre | |
120 | * - no trie is used | |
121 | * - the returned matchLength is not offset by 2 | |
122 | */ | |
123 | static int32_t | |
124 | ucnv_extMatchToU(const int32_t *cx, int8_t sisoState, | |
125 | const char *pre, int32_t preLength, | |
126 | const char *src, int32_t srcLength, | |
127 | uint32_t *pMatchValue, | |
4388f060 | 128 | UBool /*useFallback*/, UBool flush) { |
374ca955 A |
129 | const uint32_t *toUTable, *toUSection; |
130 | ||
131 | uint32_t value, matchValue; | |
729e4ab9 | 132 | int32_t i, j, idx, length, matchLength; |
374ca955 A |
133 | uint8_t b; |
134 | ||
135 | if(cx==NULL || cx[UCNV_EXT_TO_U_LENGTH]<=0) { | |
136 | return 0; /* no extension data, no match */ | |
137 | } | |
138 | ||
139 | /* initialize */ | |
140 | toUTable=UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_INDEX, uint32_t); | |
729e4ab9 | 141 | idx=0; |
374ca955 A |
142 | |
143 | matchValue=0; | |
144 | i=j=matchLength=0; | |
145 | ||
146 | if(sisoState==0) { | |
147 | /* SBCS state of an SI/SO stateful converter, look at only exactly 1 byte */ | |
148 | if(preLength>1) { | |
149 | return 0; /* no match of a DBCS sequence in SBCS mode */ | |
150 | } else if(preLength==1) { | |
151 | srcLength=0; | |
152 | } else /* preLength==0 */ { | |
153 | if(srcLength>1) { | |
154 | srcLength=1; | |
155 | } | |
156 | } | |
157 | flush=TRUE; | |
158 | } | |
159 | ||
160 | /* we must not remember fallback matches when not using fallbacks */ | |
161 | ||
162 | /* match input units until there is a full match or the input is consumed */ | |
163 | for(;;) { | |
164 | /* go to the next section */ | |
729e4ab9 | 165 | toUSection=toUTable+idx; |
374ca955 A |
166 | |
167 | /* read first pair of the section */ | |
168 | value=*toUSection++; | |
169 | length=UCNV_EXT_TO_U_GET_BYTE(value); | |
170 | value=UCNV_EXT_TO_U_GET_VALUE(value); | |
171 | if( value!=0 && | |
172 | (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) || | |
173 | TO_U_USE_FALLBACK(useFallback)) && | |
174 | UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j) | |
175 | ) { | |
176 | /* remember longest match so far */ | |
177 | matchValue=value; | |
178 | matchLength=i+j; | |
179 | } | |
180 | ||
181 | /* match pre[] then src[] */ | |
182 | if(i<preLength) { | |
183 | b=(uint8_t)pre[i++]; | |
184 | } else if(j<srcLength) { | |
185 | b=(uint8_t)src[j++]; | |
186 | } else { | |
187 | /* all input consumed, partial match */ | |
188 | if(flush || (length=(i+j))>UCNV_EXT_MAX_BYTES) { | |
189 | /* | |
190 | * end of the entire input stream, stop with the longest match so far | |
191 | * or: partial match must not be longer than UCNV_EXT_MAX_BYTES | |
192 | * because it must fit into state buffers | |
193 | */ | |
194 | break; | |
195 | } else { | |
196 | /* continue with more input next time */ | |
197 | return -length; | |
198 | } | |
199 | } | |
200 | ||
201 | /* search for the current UChar */ | |
202 | value=ucnv_extFindToU(toUSection, length, b); | |
203 | if(value==0) { | |
204 | /* no match here, stop with the longest match so far */ | |
205 | break; | |
206 | } else { | |
207 | if(UCNV_EXT_TO_U_IS_PARTIAL(value)) { | |
208 | /* partial match, continue */ | |
729e4ab9 | 209 | idx=(int32_t)UCNV_EXT_TO_U_GET_PARTIAL_INDEX(value); |
374ca955 A |
210 | } else { |
211 | if( (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) || | |
212 | TO_U_USE_FALLBACK(useFallback)) && | |
213 | UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j) | |
214 | ) { | |
215 | /* full match, stop with result */ | |
216 | matchValue=value; | |
217 | matchLength=i+j; | |
218 | } else { | |
219 | /* full match on fallback not taken, stop with the longest match so far */ | |
220 | } | |
221 | break; | |
222 | } | |
223 | } | |
224 | } | |
225 | ||
226 | if(matchLength==0) { | |
227 | /* no match at all */ | |
228 | return 0; | |
229 | } | |
230 | ||
231 | /* return result */ | |
232 | *pMatchValue=UCNV_EXT_TO_U_MASK_ROUNDTRIP(matchValue); | |
233 | return matchLength; | |
234 | } | |
235 | ||
4388f060 | 236 | static inline void |
374ca955 A |
237 | ucnv_extWriteToU(UConverter *cnv, const int32_t *cx, |
238 | uint32_t value, | |
239 | UChar **target, const UChar *targetLimit, | |
240 | int32_t **offsets, int32_t srcIndex, | |
241 | UErrorCode *pErrorCode) { | |
242 | /* output the result */ | |
243 | if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) { | |
244 | /* output a single code point */ | |
245 | ucnv_toUWriteCodePoint( | |
246 | cnv, UCNV_EXT_TO_U_GET_CODE_POINT(value), | |
247 | target, targetLimit, | |
248 | offsets, srcIndex, | |
249 | pErrorCode); | |
250 | } else { | |
251 | /* output a string - with correct data we have resultLength>0 */ | |
252 | ucnv_toUWriteUChars( | |
253 | cnv, | |
254 | UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_UCHARS_INDEX, UChar)+ | |
255 | UCNV_EXT_TO_U_GET_INDEX(value), | |
256 | UCNV_EXT_TO_U_GET_LENGTH(value), | |
257 | target, targetLimit, | |
258 | offsets, srcIndex, | |
259 | pErrorCode); | |
260 | } | |
261 | } | |
262 | ||
263 | /* | |
264 | * get the SI/SO toU state (state 0 is for SBCS, 1 for DBCS), | |
265 | * or 1 for DBCS-only, | |
266 | * or -1 if the converter is not SI/SO stateful | |
267 | * | |
268 | * Note: For SI/SO stateful converters getting here, | |
269 | * cnv->mode==0 is equivalent to firstLength==1. | |
270 | */ | |
271 | #define UCNV_SISO_STATE(cnv) \ | |
272 | ((cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO ? (int8_t)(cnv)->mode : \ | |
273 | (cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? 1 : -1) | |
274 | ||
275 | /* | |
276 | * target<targetLimit; set error code for overflow | |
277 | */ | |
278 | U_CFUNC UBool | |
279 | ucnv_extInitialMatchToU(UConverter *cnv, const int32_t *cx, | |
280 | int32_t firstLength, | |
281 | const char **src, const char *srcLimit, | |
282 | UChar **target, const UChar *targetLimit, | |
283 | int32_t **offsets, int32_t srcIndex, | |
284 | UBool flush, | |
285 | UErrorCode *pErrorCode) { | |
729e4ab9 | 286 | uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ |
374ca955 A |
287 | int32_t match; |
288 | ||
289 | /* try to match */ | |
290 | match=ucnv_extMatchToU(cx, (int8_t)UCNV_SISO_STATE(cnv), | |
291 | (const char *)cnv->toUBytes, firstLength, | |
292 | *src, (int32_t)(srcLimit-*src), | |
293 | &value, | |
294 | cnv->useFallback, flush); | |
295 | if(match>0) { | |
296 | /* advance src pointer for the consumed input */ | |
297 | *src+=match-firstLength; | |
298 | ||
299 | /* write result to target */ | |
300 | ucnv_extWriteToU(cnv, cx, | |
301 | value, | |
302 | target, targetLimit, | |
303 | offsets, srcIndex, | |
304 | pErrorCode); | |
305 | return TRUE; | |
306 | } else if(match<0) { | |
307 | /* save state for partial match */ | |
308 | const char *s; | |
309 | int32_t j; | |
310 | ||
311 | /* copy the first code point */ | |
312 | s=(const char *)cnv->toUBytes; | |
313 | cnv->preToUFirstLength=(int8_t)firstLength; | |
314 | for(j=0; j<firstLength; ++j) { | |
315 | cnv->preToU[j]=*s++; | |
316 | } | |
317 | ||
318 | /* now copy the newly consumed input */ | |
319 | s=*src; | |
320 | match=-match; | |
321 | for(; j<match; ++j) { | |
322 | cnv->preToU[j]=*s++; | |
323 | } | |
324 | *src=s; /* same as *src=srcLimit; because we reached the end of input */ | |
325 | cnv->preToULength=(int8_t)match; | |
326 | return TRUE; | |
327 | } else /* match==0 no match */ { | |
328 | return FALSE; | |
329 | } | |
330 | } | |
331 | ||
332 | U_CFUNC UChar32 | |
333 | ucnv_extSimpleMatchToU(const int32_t *cx, | |
334 | const char *source, int32_t length, | |
335 | UBool useFallback) { | |
729e4ab9 | 336 | uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ |
374ca955 A |
337 | int32_t match; |
338 | ||
339 | if(length<=0) { | |
340 | return 0xffff; | |
341 | } | |
342 | ||
343 | /* try to match */ | |
344 | match=ucnv_extMatchToU(cx, -1, | |
345 | source, length, | |
346 | NULL, 0, | |
347 | &value, | |
348 | useFallback, TRUE); | |
349 | if(match==length) { | |
350 | /* write result for simple, single-character conversion */ | |
351 | if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) { | |
352 | return UCNV_EXT_TO_U_GET_CODE_POINT(value); | |
353 | } | |
354 | } | |
355 | ||
356 | /* | |
357 | * return no match because | |
358 | * - match>0 && value points to string: simple conversion cannot handle multiple code points | |
359 | * - match>0 && match!=length: not all input consumed, forbidden for this function | |
360 | * - match==0: no match found in the first place | |
361 | * - match<0: partial match, not supported for simple conversion (and flush==TRUE) | |
362 | */ | |
363 | return 0xfffe; | |
364 | } | |
365 | ||
366 | /* | |
367 | * continue partial match with new input | |
368 | * never called for simple, single-character conversion | |
369 | */ | |
370 | U_CFUNC void | |
371 | ucnv_extContinueMatchToU(UConverter *cnv, | |
372 | UConverterToUnicodeArgs *pArgs, int32_t srcIndex, | |
373 | UErrorCode *pErrorCode) { | |
729e4ab9 | 374 | uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ |
374ca955 A |
375 | int32_t match, length; |
376 | ||
377 | match=ucnv_extMatchToU(cnv->sharedData->mbcs.extIndexes, (int8_t)UCNV_SISO_STATE(cnv), | |
378 | cnv->preToU, cnv->preToULength, | |
379 | pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source), | |
380 | &value, | |
381 | cnv->useFallback, pArgs->flush); | |
382 | if(match>0) { | |
383 | if(match>=cnv->preToULength) { | |
384 | /* advance src pointer for the consumed input */ | |
385 | pArgs->source+=match-cnv->preToULength; | |
386 | cnv->preToULength=0; | |
387 | } else { | |
388 | /* the match did not use all of preToU[] - keep the rest for replay */ | |
389 | length=cnv->preToULength-match; | |
390 | uprv_memmove(cnv->preToU, cnv->preToU+match, length); | |
391 | cnv->preToULength=(int8_t)-length; | |
392 | } | |
393 | ||
394 | /* write result */ | |
395 | ucnv_extWriteToU(cnv, cnv->sharedData->mbcs.extIndexes, | |
396 | value, | |
397 | &pArgs->target, pArgs->targetLimit, | |
398 | &pArgs->offsets, srcIndex, | |
399 | pErrorCode); | |
400 | } else if(match<0) { | |
401 | /* save state for partial match */ | |
402 | const char *s; | |
403 | int32_t j; | |
404 | ||
405 | /* just _append_ the newly consumed input to preToU[] */ | |
406 | s=pArgs->source; | |
407 | match=-match; | |
408 | for(j=cnv->preToULength; j<match; ++j) { | |
409 | cnv->preToU[j]=*s++; | |
410 | } | |
411 | pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */ | |
412 | cnv->preToULength=(int8_t)match; | |
413 | } else /* match==0 */ { | |
414 | /* | |
415 | * no match | |
416 | * | |
417 | * We need to split the previous input into two parts: | |
418 | * | |
419 | * 1. The first codepage character is unmappable - that's how we got into | |
420 | * trying the extension data in the first place. | |
421 | * We need to move it from the preToU buffer | |
422 | * to the error buffer, set an error code, | |
423 | * and prepare the rest of the previous input for 2. | |
424 | * | |
425 | * 2. The rest of the previous input must be converted once we | |
426 | * come back from the callback for the first character. | |
427 | * At that time, we have to try again from scratch to convert | |
428 | * these input characters. | |
429 | * The replay will be handled by the ucnv.c conversion code. | |
430 | */ | |
431 | ||
432 | /* move the first codepage character to the error field */ | |
433 | uprv_memcpy(cnv->toUBytes, cnv->preToU, cnv->preToUFirstLength); | |
434 | cnv->toULength=cnv->preToUFirstLength; | |
435 | ||
436 | /* move the rest up inside the buffer */ | |
437 | length=cnv->preToULength-cnv->preToUFirstLength; | |
438 | if(length>0) { | |
439 | uprv_memmove(cnv->preToU, cnv->preToU+cnv->preToUFirstLength, length); | |
440 | } | |
441 | ||
442 | /* mark preToU for replay */ | |
443 | cnv->preToULength=(int8_t)-length; | |
444 | ||
445 | /* set the error code for unassigned */ | |
446 | *pErrorCode=U_INVALID_CHAR_FOUND; | |
447 | } | |
448 | } | |
449 | ||
450 | /* from Unicode ------------------------------------------------------------- */ | |
451 | ||
51004dcb A |
452 | // Use roundtrips, "good one-way" mappings, and some normal fallbacks. |
453 | static inline UBool | |
454 | extFromUUseMapping(UBool useFallback, uint32_t value, UChar32 firstCP) { | |
455 | return | |
456 | ((value&UCNV_EXT_FROM_U_STATUS_MASK)!=0 || | |
457 | FROM_U_USE_FALLBACK(useFallback, firstCP)) && | |
458 | (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0; | |
459 | } | |
460 | ||
374ca955 A |
461 | /* |
462 | * @return index of the UChar, if found; else <0 | |
463 | */ | |
4388f060 | 464 | static inline int32_t |
374ca955 A |
465 | ucnv_extFindFromU(const UChar *fromUSection, int32_t length, UChar u) { |
466 | int32_t i, start, limit; | |
467 | ||
468 | /* binary search */ | |
469 | start=0; | |
470 | limit=length; | |
471 | for(;;) { | |
472 | i=limit-start; | |
473 | if(i<=1) { | |
474 | break; /* done */ | |
475 | } | |
476 | /* start<limit-1 */ | |
477 | ||
478 | if(i<=4) { | |
479 | /* linear search for the last part */ | |
480 | if(u<=fromUSection[start]) { | |
481 | break; | |
482 | } | |
483 | if(++start<limit && u<=fromUSection[start]) { | |
484 | break; | |
485 | } | |
486 | if(++start<limit && u<=fromUSection[start]) { | |
487 | break; | |
488 | } | |
489 | /* always break at start==limit-1 */ | |
490 | ++start; | |
491 | break; | |
492 | } | |
493 | ||
494 | i=(start+limit)/2; | |
495 | if(u<fromUSection[i]) { | |
496 | limit=i; | |
497 | } else { | |
498 | start=i; | |
499 | } | |
500 | } | |
501 | ||
502 | /* did we really find it? */ | |
503 | if(start<limit && u==fromUSection[start]) { | |
504 | return start; | |
505 | } else { | |
506 | return -1; /* not found */ | |
507 | } | |
508 | } | |
509 | ||
510 | /* | |
511 | * @param cx pointer to extension data; if NULL, returns 0 | |
512 | * @param firstCP the first code point before all the other UChars | |
513 | * @param pre UChars that must match; !initialMatch: partial match with them | |
514 | * @param preLength length of pre, >=0 | |
515 | * @param src UChars that can be used to complete a match | |
516 | * @param srcLength length of src, >=0 | |
517 | * @param pMatchValue [out] output result value for the match from the data structure | |
518 | * @param useFallback "use fallback" flag, usually from cnv->useFallback | |
519 | * @param flush TRUE if the end of the input stream is reached | |
520 | * @return >1: matched, return value=total match length (number of input units matched) | |
521 | * 1: matched, no mapping but request for <subchar1> | |
522 | * (only for the first code point) | |
523 | * 0: no match | |
524 | * <0: partial match, return value=negative total match length | |
525 | * (partial matches are never returned for flush==TRUE) | |
526 | * (partial matches are never returned as being longer than UCNV_EXT_MAX_UCHARS) | |
527 | * the matchLength is 2 if only firstCP matched, and >2 if firstCP and | |
528 | * further code units matched | |
529 | */ | |
530 | static int32_t | |
531 | ucnv_extMatchFromU(const int32_t *cx, | |
532 | UChar32 firstCP, | |
533 | const UChar *pre, int32_t preLength, | |
534 | const UChar *src, int32_t srcLength, | |
535 | uint32_t *pMatchValue, | |
536 | UBool useFallback, UBool flush) { | |
537 | const uint16_t *stage12, *stage3; | |
538 | const uint32_t *stage3b; | |
539 | ||
540 | const UChar *fromUTableUChars, *fromUSectionUChars; | |
541 | const uint32_t *fromUTableValues, *fromUSectionValues; | |
542 | ||
543 | uint32_t value, matchValue; | |
729e4ab9 | 544 | int32_t i, j, idx, length, matchLength; |
374ca955 A |
545 | UChar c; |
546 | ||
547 | if(cx==NULL) { | |
548 | return 0; /* no extension data, no match */ | |
549 | } | |
550 | ||
551 | /* trie lookup of firstCP */ | |
729e4ab9 A |
552 | idx=firstCP>>10; /* stage 1 index */ |
553 | if(idx>=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]) { | |
374ca955 A |
554 | return 0; /* the first code point is outside the trie */ |
555 | } | |
556 | ||
557 | stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t); | |
558 | stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t); | |
729e4ab9 | 559 | idx=UCNV_EXT_FROM_U(stage12, stage3, idx, firstCP); |
374ca955 A |
560 | |
561 | stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t); | |
729e4ab9 | 562 | value=stage3b[idx]; |
374ca955 A |
563 | if(value==0) { |
564 | return 0; | |
565 | } | |
566 | ||
46f4442e A |
567 | /* |
568 | * Tests for (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0: | |
569 | * Do not interpret values with reserved bits used, for forward compatibility, | |
570 | * and do not even remember intermediate results with reserved bits used. | |
571 | */ | |
572 | ||
374ca955 A |
573 | if(UCNV_EXT_TO_U_IS_PARTIAL(value)) { |
574 | /* partial match, enter the loop below */ | |
729e4ab9 | 575 | idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value); |
374ca955 A |
576 | |
577 | /* initialize */ | |
578 | fromUTableUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar); | |
579 | fromUTableValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t); | |
580 | ||
581 | matchValue=0; | |
582 | i=j=matchLength=0; | |
583 | ||
584 | /* we must not remember fallback matches when not using fallbacks */ | |
585 | ||
586 | /* match input units until there is a full match or the input is consumed */ | |
587 | for(;;) { | |
588 | /* go to the next section */ | |
729e4ab9 A |
589 | fromUSectionUChars=fromUTableUChars+idx; |
590 | fromUSectionValues=fromUTableValues+idx; | |
374ca955 A |
591 | |
592 | /* read first pair of the section */ | |
593 | length=*fromUSectionUChars++; | |
594 | value=*fromUSectionValues++; | |
51004dcb | 595 | if(value!=0 && extFromUUseMapping(useFallback, value, firstCP)) { |
374ca955 A |
596 | /* remember longest match so far */ |
597 | matchValue=value; | |
598 | matchLength=2+i+j; | |
599 | } | |
600 | ||
601 | /* match pre[] then src[] */ | |
602 | if(i<preLength) { | |
603 | c=pre[i++]; | |
604 | } else if(j<srcLength) { | |
605 | c=src[j++]; | |
606 | } else { | |
607 | /* all input consumed, partial match */ | |
608 | if(flush || (length=(i+j))>UCNV_EXT_MAX_UCHARS) { | |
609 | /* | |
610 | * end of the entire input stream, stop with the longest match so far | |
611 | * or: partial match must not be longer than UCNV_EXT_MAX_UCHARS | |
612 | * because it must fit into state buffers | |
613 | */ | |
614 | break; | |
615 | } else { | |
616 | /* continue with more input next time */ | |
617 | return -(2+length); | |
618 | } | |
619 | } | |
620 | ||
621 | /* search for the current UChar */ | |
729e4ab9 A |
622 | idx=ucnv_extFindFromU(fromUSectionUChars, length, c); |
623 | if(idx<0) { | |
374ca955 A |
624 | /* no match here, stop with the longest match so far */ |
625 | break; | |
626 | } else { | |
729e4ab9 | 627 | value=fromUSectionValues[idx]; |
374ca955 A |
628 | if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { |
629 | /* partial match, continue */ | |
729e4ab9 | 630 | idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value); |
374ca955 | 631 | } else { |
51004dcb | 632 | if(extFromUUseMapping(useFallback, value, firstCP)) { |
374ca955 A |
633 | /* full match, stop with result */ |
634 | matchValue=value; | |
635 | matchLength=2+i+j; | |
636 | } else { | |
637 | /* full match on fallback not taken, stop with the longest match so far */ | |
638 | } | |
639 | break; | |
640 | } | |
641 | } | |
642 | } | |
643 | ||
644 | if(matchLength==0) { | |
645 | /* no match at all */ | |
646 | return 0; | |
647 | } | |
648 | } else /* result from firstCP trie lookup */ { | |
51004dcb | 649 | if(extFromUUseMapping(useFallback, value, firstCP)) { |
374ca955 A |
650 | /* full match, stop with result */ |
651 | matchValue=value; | |
652 | matchLength=2; | |
653 | } else { | |
654 | /* fallback not taken */ | |
655 | return 0; | |
656 | } | |
657 | } | |
658 | ||
374ca955 A |
659 | /* return result */ |
660 | if(matchValue==UCNV_EXT_FROM_U_SUBCHAR1) { | |
661 | return 1; /* assert matchLength==2 */ | |
662 | } | |
663 | ||
46f4442e | 664 | *pMatchValue=matchValue; |
374ca955 A |
665 | return matchLength; |
666 | } | |
667 | ||
46f4442e A |
668 | /* |
669 | * @param value fromUnicode mapping table value; ignores roundtrip and reserved bits | |
670 | */ | |
4388f060 | 671 | static inline void |
374ca955 A |
672 | ucnv_extWriteFromU(UConverter *cnv, const int32_t *cx, |
673 | uint32_t value, | |
674 | char **target, const char *targetLimit, | |
675 | int32_t **offsets, int32_t srcIndex, | |
676 | UErrorCode *pErrorCode) { | |
677 | uint8_t buffer[1+UCNV_EXT_MAX_BYTES]; | |
678 | const uint8_t *result; | |
679 | int32_t length, prevLength; | |
680 | ||
681 | length=UCNV_EXT_FROM_U_GET_LENGTH(value); | |
682 | value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value); | |
683 | ||
684 | /* output the result */ | |
685 | if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) { | |
686 | /* | |
687 | * Generate a byte array and then write it below. | |
688 | * This is not the fastest possible way, but it should be ok for | |
689 | * extension mappings, and it is much simpler. | |
690 | * Offset and overflow handling are only done once this way. | |
691 | */ | |
692 | uint8_t *p=buffer+1; /* reserve buffer[0] for shiftByte below */ | |
693 | switch(length) { | |
694 | case 3: | |
695 | *p++=(uint8_t)(value>>16); | |
2ca993e8 A |
696 | U_FALLTHROUGH; |
697 | case 2: | |
374ca955 | 698 | *p++=(uint8_t)(value>>8); |
2ca993e8 A |
699 | U_FALLTHROUGH; |
700 | case 1: | |
374ca955 | 701 | *p++=(uint8_t)value; |
2ca993e8 | 702 | U_FALLTHROUGH; |
374ca955 A |
703 | default: |
704 | break; /* will never occur */ | |
705 | } | |
706 | result=buffer+1; | |
707 | } else { | |
708 | result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value; | |
709 | } | |
710 | ||
711 | /* with correct data we have length>0 */ | |
712 | ||
713 | if((prevLength=cnv->fromUnicodeStatus)!=0) { | |
714 | /* handle SI/SO stateful output */ | |
715 | uint8_t shiftByte; | |
716 | ||
717 | if(prevLength>1 && length==1) { | |
718 | /* change from double-byte mode to single-byte */ | |
719 | shiftByte=(uint8_t)UCNV_SI; | |
720 | cnv->fromUnicodeStatus=1; | |
721 | } else if(prevLength==1 && length>1) { | |
722 | /* change from single-byte mode to double-byte */ | |
723 | shiftByte=(uint8_t)UCNV_SO; | |
724 | cnv->fromUnicodeStatus=2; | |
725 | } else { | |
726 | shiftByte=0; | |
727 | } | |
728 | ||
729 | if(shiftByte!=0) { | |
730 | /* prepend the shift byte to the result bytes */ | |
731 | buffer[0]=shiftByte; | |
732 | if(result!=buffer+1) { | |
733 | uprv_memcpy(buffer+1, result, length); | |
734 | } | |
735 | result=buffer; | |
736 | ++length; | |
737 | } | |
738 | } | |
739 | ||
740 | ucnv_fromUWriteBytes(cnv, (const char *)result, length, | |
741 | target, targetLimit, | |
742 | offsets, srcIndex, | |
743 | pErrorCode); | |
744 | } | |
745 | ||
746 | /* | |
747 | * target<targetLimit; set error code for overflow | |
748 | */ | |
749 | U_CFUNC UBool | |
750 | ucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx, | |
751 | UChar32 cp, | |
752 | const UChar **src, const UChar *srcLimit, | |
753 | char **target, const char *targetLimit, | |
754 | int32_t **offsets, int32_t srcIndex, | |
755 | UBool flush, | |
756 | UErrorCode *pErrorCode) { | |
729e4ab9 | 757 | uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ |
374ca955 A |
758 | int32_t match; |
759 | ||
760 | /* try to match */ | |
761 | match=ucnv_extMatchFromU(cx, cp, | |
762 | NULL, 0, | |
763 | *src, (int32_t)(srcLimit-*src), | |
764 | &value, | |
765 | cnv->useFallback, flush); | |
766 | ||
767 | /* reject a match if the result is a single byte for DBCS-only */ | |
768 | if( match>=2 && | |
769 | !(UCNV_EXT_FROM_U_GET_LENGTH(value)==1 && | |
770 | cnv->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY) | |
771 | ) { | |
772 | /* advance src pointer for the consumed input */ | |
773 | *src+=match-2; /* remove 2 for the initial code point */ | |
774 | ||
775 | /* write result to target */ | |
776 | ucnv_extWriteFromU(cnv, cx, | |
777 | value, | |
778 | target, targetLimit, | |
779 | offsets, srcIndex, | |
780 | pErrorCode); | |
781 | return TRUE; | |
782 | } else if(match<0) { | |
783 | /* save state for partial match */ | |
784 | const UChar *s; | |
785 | int32_t j; | |
786 | ||
787 | /* copy the first code point */ | |
788 | cnv->preFromUFirstCP=cp; | |
789 | ||
790 | /* now copy the newly consumed input */ | |
791 | s=*src; | |
792 | match=-match-2; /* remove 2 for the initial code point */ | |
793 | for(j=0; j<match; ++j) { | |
794 | cnv->preFromU[j]=*s++; | |
795 | } | |
796 | *src=s; /* same as *src=srcLimit; because we reached the end of input */ | |
797 | cnv->preFromULength=(int8_t)match; | |
798 | return TRUE; | |
799 | } else if(match==1) { | |
800 | /* matched, no mapping but request for <subchar1> */ | |
801 | cnv->useSubChar1=TRUE; | |
802 | return FALSE; | |
803 | } else /* match==0 no match */ { | |
804 | return FALSE; | |
805 | } | |
806 | } | |
807 | ||
46f4442e A |
808 | /* |
809 | * Used by ISO 2022 implementation. | |
810 | * @return number of bytes in *pValue; negative number if fallback; 0 for no mapping | |
811 | */ | |
374ca955 A |
812 | U_CFUNC int32_t |
813 | ucnv_extSimpleMatchFromU(const int32_t *cx, | |
814 | UChar32 cp, uint32_t *pValue, | |
815 | UBool useFallback) { | |
816 | uint32_t value; | |
817 | int32_t match; | |
818 | ||
819 | /* try to match */ | |
820 | match=ucnv_extMatchFromU(cx, | |
821 | cp, | |
822 | NULL, 0, | |
823 | NULL, 0, | |
824 | &value, | |
825 | useFallback, TRUE); | |
826 | if(match>=2) { | |
827 | /* write result for simple, single-character conversion */ | |
828 | int32_t length; | |
46f4442e A |
829 | int isRoundtrip; |
830 | ||
831 | isRoundtrip=UCNV_EXT_FROM_U_IS_ROUNDTRIP(value); | |
374ca955 A |
832 | length=UCNV_EXT_FROM_U_GET_LENGTH(value); |
833 | value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value); | |
834 | ||
835 | if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) { | |
836 | *pValue=value; | |
46f4442e | 837 | return isRoundtrip ? length : -length; |
374ca955 A |
838 | #if 0 /* not currently used */ |
839 | } else if(length==4) { | |
840 | /* de-serialize a 4-byte result */ | |
841 | const uint8_t *result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value; | |
842 | *pValue= | |
843 | ((uint32_t)result[0]<<24)| | |
844 | ((uint32_t)result[1]<<16)| | |
845 | ((uint32_t)result[2]<<8)| | |
846 | result[3]; | |
46f4442e | 847 | return isRoundtrip ? 4 : -4; |
374ca955 A |
848 | #endif |
849 | } | |
850 | } | |
851 | ||
852 | /* | |
853 | * return no match because | |
854 | * - match>1 && resultLength>4: result too long for simple conversion | |
855 | * - match==1: no match found, <subchar1> preferred | |
856 | * - match==0: no match found in the first place | |
857 | * - match<0: partial match, not supported for simple conversion (and flush==TRUE) | |
858 | */ | |
859 | return 0; | |
860 | } | |
861 | ||
862 | /* | |
863 | * continue partial match with new input, requires cnv->preFromUFirstCP>=0 | |
864 | * never called for simple, single-character conversion | |
865 | */ | |
866 | U_CFUNC void | |
867 | ucnv_extContinueMatchFromU(UConverter *cnv, | |
868 | UConverterFromUnicodeArgs *pArgs, int32_t srcIndex, | |
869 | UErrorCode *pErrorCode) { | |
729e4ab9 | 870 | uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ |
374ca955 A |
871 | int32_t match; |
872 | ||
873 | match=ucnv_extMatchFromU(cnv->sharedData->mbcs.extIndexes, | |
874 | cnv->preFromUFirstCP, | |
875 | cnv->preFromU, cnv->preFromULength, | |
876 | pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source), | |
877 | &value, | |
878 | cnv->useFallback, pArgs->flush); | |
879 | if(match>=2) { | |
880 | match-=2; /* remove 2 for the initial code point */ | |
881 | ||
882 | if(match>=cnv->preFromULength) { | |
883 | /* advance src pointer for the consumed input */ | |
884 | pArgs->source+=match-cnv->preFromULength; | |
885 | cnv->preFromULength=0; | |
886 | } else { | |
887 | /* the match did not use all of preFromU[] - keep the rest for replay */ | |
888 | int32_t length=cnv->preFromULength-match; | |
a62d09fc | 889 | u_memmove(cnv->preFromU, cnv->preFromU+match, length); |
374ca955 A |
890 | cnv->preFromULength=(int8_t)-length; |
891 | } | |
892 | ||
893 | /* finish the partial match */ | |
894 | cnv->preFromUFirstCP=U_SENTINEL; | |
895 | ||
896 | /* write result */ | |
897 | ucnv_extWriteFromU(cnv, cnv->sharedData->mbcs.extIndexes, | |
898 | value, | |
899 | &pArgs->target, pArgs->targetLimit, | |
900 | &pArgs->offsets, srcIndex, | |
901 | pErrorCode); | |
902 | } else if(match<0) { | |
903 | /* save state for partial match */ | |
904 | const UChar *s; | |
905 | int32_t j; | |
906 | ||
907 | /* just _append_ the newly consumed input to preFromU[] */ | |
908 | s=pArgs->source; | |
909 | match=-match-2; /* remove 2 for the initial code point */ | |
910 | for(j=cnv->preFromULength; j<match; ++j) { | |
4388f060 | 911 | U_ASSERT(j>=0); |
374ca955 A |
912 | cnv->preFromU[j]=*s++; |
913 | } | |
914 | pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */ | |
915 | cnv->preFromULength=(int8_t)match; | |
916 | } else /* match==0 or 1 */ { | |
917 | /* | |
918 | * no match | |
919 | * | |
920 | * We need to split the previous input into two parts: | |
921 | * | |
922 | * 1. The first code point is unmappable - that's how we got into | |
923 | * trying the extension data in the first place. | |
924 | * We need to move it from the preFromU buffer | |
925 | * to the error buffer, set an error code, | |
926 | * and prepare the rest of the previous input for 2. | |
927 | * | |
928 | * 2. The rest of the previous input must be converted once we | |
929 | * come back from the callback for the first code point. | |
930 | * At that time, we have to try again from scratch to convert | |
931 | * these input characters. | |
932 | * The replay will be handled by the ucnv.c conversion code. | |
933 | */ | |
934 | ||
935 | if(match==1) { | |
936 | /* matched, no mapping but request for <subchar1> */ | |
937 | cnv->useSubChar1=TRUE; | |
938 | } | |
939 | ||
940 | /* move the first code point to the error field */ | |
941 | cnv->fromUChar32=cnv->preFromUFirstCP; | |
942 | cnv->preFromUFirstCP=U_SENTINEL; | |
943 | ||
944 | /* mark preFromU for replay */ | |
945 | cnv->preFromULength=-cnv->preFromULength; | |
946 | ||
947 | /* set the error code for unassigned */ | |
948 | *pErrorCode=U_INVALID_CHAR_FOUND; | |
949 | } | |
950 | } | |
951 | ||
51004dcb A |
952 | static UBool |
953 | extSetUseMapping(UConverterUnicodeSet which, int32_t minLength, uint32_t value) { | |
954 | if(which==UCNV_ROUNDTRIP_SET) { | |
955 | // Add only code points for which the roundtrip flag is set. | |
956 | // Do not add any fallbacks, even if ucnv_fromUnicode() would use them | |
957 | // (fallbacks from PUA). See the API docs for ucnv_getUnicodeSet(). | |
958 | // | |
959 | // By analogy, also do not add "good one-way" mappings. | |
960 | // | |
961 | // Do not add entries with reserved bits set. | |
962 | if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))!= | |
963 | UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) { | |
964 | return FALSE; | |
965 | } | |
966 | } else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ { | |
967 | // Do not add entries with reserved bits set. | |
968 | if((value&UCNV_EXT_FROM_U_RESERVED_MASK)!=0) { | |
969 | return FALSE; | |
970 | } | |
971 | } | |
972 | // Do not add <subchar1> entries or other (future?) pseudo-entries | |
973 | // with an output length of 0. | |
974 | return UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength; | |
975 | } | |
976 | ||
374ca955 A |
977 | static void |
978 | ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData, | |
979 | const int32_t *cx, | |
73c04bcf | 980 | const USetAdder *sa, |
51004dcb | 981 | UConverterUnicodeSet which, |
374ca955 | 982 | int32_t minLength, |
51004dcb | 983 | UChar32 firstCP, |
374ca955 A |
984 | UChar s[UCNV_EXT_MAX_UCHARS], int32_t length, |
985 | int32_t sectionIndex, | |
986 | UErrorCode *pErrorCode) { | |
987 | const UChar *fromUSectionUChars; | |
988 | const uint32_t *fromUSectionValues; | |
989 | ||
990 | uint32_t value; | |
991 | int32_t i, count; | |
992 | ||
993 | fromUSectionUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar)+sectionIndex; | |
994 | fromUSectionValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t)+sectionIndex; | |
995 | ||
996 | /* read first pair of the section */ | |
997 | count=*fromUSectionUChars++; | |
998 | value=*fromUSectionValues++; | |
999 | ||
51004dcb A |
1000 | if(extSetUseMapping(which, minLength, value)) { |
1001 | if(length==U16_LENGTH(firstCP)) { | |
374ca955 | 1002 | /* add the initial code point */ |
51004dcb | 1003 | sa->add(sa->set, firstCP); |
374ca955 A |
1004 | } else { |
1005 | /* add the string so far */ | |
1006 | sa->addString(sa->set, s, length); | |
1007 | } | |
1008 | } | |
1009 | ||
1010 | for(i=0; i<count; ++i) { | |
1011 | /* append this code unit and recurse or add the string */ | |
1012 | s[length]=fromUSectionUChars[i]; | |
1013 | value=fromUSectionValues[i]; | |
1014 | ||
1015 | if(value==0) { | |
1016 | /* no mapping, do nothing */ | |
1017 | } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { | |
1018 | ucnv_extGetUnicodeSetString( | |
51004dcb A |
1019 | sharedData, cx, sa, which, minLength, |
1020 | firstCP, s, length+1, | |
374ca955 A |
1021 | (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), |
1022 | pErrorCode); | |
51004dcb | 1023 | } else if(extSetUseMapping(which, minLength, value)) { |
374ca955 A |
1024 | sa->addString(sa->set, s, length+1); |
1025 | } | |
1026 | } | |
1027 | } | |
1028 | ||
1029 | U_CFUNC void | |
1030 | ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData, | |
73c04bcf | 1031 | const USetAdder *sa, |
374ca955 | 1032 | UConverterUnicodeSet which, |
46f4442e | 1033 | UConverterSetFilter filter, |
374ca955 A |
1034 | UErrorCode *pErrorCode) { |
1035 | const int32_t *cx; | |
1036 | const uint16_t *stage12, *stage3, *ps2, *ps3; | |
1037 | const uint32_t *stage3b; | |
1038 | ||
1039 | uint32_t value; | |
1040 | int32_t st1, stage1Length, st2, st3, minLength; | |
1041 | ||
1042 | UChar s[UCNV_EXT_MAX_UCHARS]; | |
1043 | UChar32 c; | |
1044 | int32_t length; | |
1045 | ||
1046 | cx=sharedData->mbcs.extIndexes; | |
1047 | if(cx==NULL) { | |
1048 | return; | |
1049 | } | |
1050 | ||
1051 | stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t); | |
1052 | stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t); | |
1053 | stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t); | |
1054 | ||
1055 | stage1Length=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]; | |
1056 | ||
1057 | /* enumerate the from-Unicode trie table */ | |
1058 | c=0; /* keep track of the current code point while enumerating */ | |
1059 | ||
46f4442e A |
1060 | if(filter==UCNV_SET_FILTER_2022_CN) { |
1061 | minLength=3; | |
1062 | } else if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY || | |
1063 | filter!=UCNV_SET_FILTER_NONE | |
1064 | ) { | |
374ca955 A |
1065 | /* DBCS-only, ignore single-byte results */ |
1066 | minLength=2; | |
1067 | } else { | |
1068 | minLength=1; | |
1069 | } | |
1070 | ||
1071 | /* | |
1072 | * the trie enumeration is almost the same as | |
1073 | * in MBCSGetUnicodeSet() for MBCS_OUTPUT_1 | |
1074 | */ | |
1075 | for(st1=0; st1<stage1Length; ++st1) { | |
1076 | st2=stage12[st1]; | |
1077 | if(st2>stage1Length) { | |
1078 | ps2=stage12+st2; | |
1079 | for(st2=0; st2<64; ++st2) { | |
1080 | if((st3=(int32_t)ps2[st2]<<UCNV_EXT_STAGE_2_LEFT_SHIFT)!=0) { | |
1081 | /* read the stage 3 block */ | |
1082 | ps3=stage3+st3; | |
1083 | ||
374ca955 A |
1084 | do { |
1085 | value=stage3b[*ps3++]; | |
1086 | if(value==0) { | |
1087 | /* no mapping, do nothing */ | |
1088 | } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { | |
51004dcb | 1089 | // Recurse for partial results. |
374ca955 A |
1090 | length=0; |
1091 | U16_APPEND_UNSAFE(s, length, c); | |
1092 | ucnv_extGetUnicodeSetString( | |
51004dcb | 1093 | sharedData, cx, sa, which, minLength, |
374ca955 A |
1094 | c, s, length, |
1095 | (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), | |
1096 | pErrorCode); | |
51004dcb | 1097 | } else if(extSetUseMapping(which, minLength, value)) { |
46f4442e A |
1098 | switch(filter) { |
1099 | case UCNV_SET_FILTER_2022_CN: | |
1100 | if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==3 && UCNV_EXT_FROM_U_GET_DATA(value)<=0x82ffff)) { | |
1101 | continue; | |
1102 | } | |
1103 | break; | |
1104 | case UCNV_SET_FILTER_SJIS: | |
1105 | if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && (value=UCNV_EXT_FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)) { | |
1106 | continue; | |
1107 | } | |
1108 | break; | |
1109 | case UCNV_SET_FILTER_GR94DBCS: | |
1110 | if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && | |
1111 | (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfefe - 0xa1a1) && | |
1112 | (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) { | |
1113 | continue; | |
1114 | } | |
1115 | break; | |
1116 | case UCNV_SET_FILTER_HZ: | |
1117 | if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && | |
1118 | (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfdfe - 0xa1a1) && | |
1119 | (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) { | |
1120 | continue; | |
1121 | } | |
1122 | break; | |
1123 | default: | |
1124 | /* | |
1125 | * UCNV_SET_FILTER_NONE, | |
1126 | * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength | |
1127 | */ | |
1128 | break; | |
1129 | } | |
374ca955 A |
1130 | sa->add(sa->set, c); |
1131 | } | |
1132 | } while((++c&0xf)!=0); | |
1133 | } else { | |
1134 | c+=16; /* empty stage 3 block */ | |
1135 | } | |
1136 | } | |
1137 | } else { | |
1138 | c+=1024; /* empty stage 2 block */ | |
1139 | } | |
1140 | } | |
1141 | } | |
1142 | ||
1143 | #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |