]>
Commit | Line | Data |
---|---|---|
374ca955 A |
1 | /* |
2 | ****************************************************************************** | |
3 | * | |
2ca993e8 | 4 | * Copyright (C) 2003-2016, International Business Machines |
374ca955 A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ****************************************************************************** | |
4388f060 | 8 | * file name: ucnv_ext.cpp |
374ca955 A |
9 | * encoding: US-ASCII |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2003jun13 | |
14 | * created by: Markus W. Scherer | |
15 | * | |
16 | * Conversion extensions | |
17 | */ | |
18 | ||
19 | #include "unicode/utypes.h" | |
20 | ||
21 | #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION | |
22 | ||
23 | #include "unicode/uset.h" | |
24 | #include "ucnv_bld.h" | |
25 | #include "ucnv_cnv.h" | |
26 | #include "ucnv_ext.h" | |
27 | #include "cmemory.h" | |
4388f060 | 28 | #include "uassert.h" |
374ca955 A |
29 | |
30 | /* to Unicode --------------------------------------------------------------- */ | |
31 | ||
32 | /* | |
33 | * @return lookup value for the byte, if found; else 0 | |
34 | */ | |
4388f060 | 35 | static inline uint32_t |
374ca955 A |
36 | ucnv_extFindToU(const uint32_t *toUSection, int32_t length, uint8_t byte) { |
37 | uint32_t word0, word; | |
38 | int32_t i, start, limit; | |
39 | ||
40 | /* check the input byte against the lowest and highest section bytes */ | |
41 | start=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[0]); | |
42 | limit=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[length-1]); | |
43 | if(byte<start || limit<byte) { | |
44 | return 0; /* the byte is out of range */ | |
45 | } | |
46 | ||
47 | if(length==((limit-start)+1)) { | |
48 | /* direct access on a linear array */ | |
49 | return UCNV_EXT_TO_U_GET_VALUE(toUSection[byte-start]); /* could be 0 */ | |
50 | } | |
51 | ||
52 | /* word0 is suitable for <=toUSection[] comparison, word for <toUSection[] */ | |
53 | word0=UCNV_EXT_TO_U_MAKE_WORD(byte, 0); | |
54 | ||
55 | /* | |
56 | * Shift byte once instead of each section word and add 0xffffff. | |
57 | * We will compare the shifted/added byte (bbffffff) against | |
58 | * section words which have byte values in the same bit position. | |
59 | * If and only if byte bb < section byte ss then bbffffff<ssvvvvvv | |
60 | * for all v=0..f | |
61 | * so we need not mask off the lower 24 bits of each section word. | |
62 | */ | |
63 | word=word0|UCNV_EXT_TO_U_VALUE_MASK; | |
64 | ||
65 | /* binary search */ | |
66 | start=0; | |
67 | limit=length; | |
68 | for(;;) { | |
69 | i=limit-start; | |
70 | if(i<=1) { | |
71 | break; /* done */ | |
72 | } | |
73 | /* start<limit-1 */ | |
74 | ||
75 | if(i<=4) { | |
76 | /* linear search for the last part */ | |
77 | if(word0<=toUSection[start]) { | |
78 | break; | |
79 | } | |
80 | if(++start<limit && word0<=toUSection[start]) { | |
81 | break; | |
82 | } | |
83 | if(++start<limit && word0<=toUSection[start]) { | |
84 | break; | |
85 | } | |
86 | /* always break at start==limit-1 */ | |
87 | ++start; | |
88 | break; | |
89 | } | |
90 | ||
91 | i=(start+limit)/2; | |
92 | if(word<toUSection[i]) { | |
93 | limit=i; | |
94 | } else { | |
95 | start=i; | |
96 | } | |
97 | } | |
98 | ||
99 | /* did we really find it? */ | |
100 | if(start<limit && byte==UCNV_EXT_TO_U_GET_BYTE(word=toUSection[start])) { | |
101 | return UCNV_EXT_TO_U_GET_VALUE(word); /* never 0 */ | |
102 | } else { | |
103 | return 0; /* not found */ | |
104 | } | |
105 | } | |
106 | ||
107 | /* | |
108 | * TRUE if not an SI/SO stateful converter, | |
109 | * or if the match length fits with the current converter state | |
110 | */ | |
111 | #define UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, match) \ | |
112 | ((sisoState)<0 || ((sisoState)==0) == (match==1)) | |
113 | ||
114 | /* | |
115 | * this works like ucnv_extMatchFromU() except | |
116 | * - the first character is in pre | |
117 | * - no trie is used | |
118 | * - the returned matchLength is not offset by 2 | |
119 | */ | |
120 | static int32_t | |
121 | ucnv_extMatchToU(const int32_t *cx, int8_t sisoState, | |
122 | const char *pre, int32_t preLength, | |
123 | const char *src, int32_t srcLength, | |
124 | uint32_t *pMatchValue, | |
4388f060 | 125 | UBool /*useFallback*/, UBool flush) { |
374ca955 A |
126 | const uint32_t *toUTable, *toUSection; |
127 | ||
128 | uint32_t value, matchValue; | |
729e4ab9 | 129 | int32_t i, j, idx, length, matchLength; |
374ca955 A |
130 | uint8_t b; |
131 | ||
132 | if(cx==NULL || cx[UCNV_EXT_TO_U_LENGTH]<=0) { | |
133 | return 0; /* no extension data, no match */ | |
134 | } | |
135 | ||
136 | /* initialize */ | |
137 | toUTable=UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_INDEX, uint32_t); | |
729e4ab9 | 138 | idx=0; |
374ca955 A |
139 | |
140 | matchValue=0; | |
141 | i=j=matchLength=0; | |
142 | ||
143 | if(sisoState==0) { | |
144 | /* SBCS state of an SI/SO stateful converter, look at only exactly 1 byte */ | |
145 | if(preLength>1) { | |
146 | return 0; /* no match of a DBCS sequence in SBCS mode */ | |
147 | } else if(preLength==1) { | |
148 | srcLength=0; | |
149 | } else /* preLength==0 */ { | |
150 | if(srcLength>1) { | |
151 | srcLength=1; | |
152 | } | |
153 | } | |
154 | flush=TRUE; | |
155 | } | |
156 | ||
157 | /* we must not remember fallback matches when not using fallbacks */ | |
158 | ||
159 | /* match input units until there is a full match or the input is consumed */ | |
160 | for(;;) { | |
161 | /* go to the next section */ | |
729e4ab9 | 162 | toUSection=toUTable+idx; |
374ca955 A |
163 | |
164 | /* read first pair of the section */ | |
165 | value=*toUSection++; | |
166 | length=UCNV_EXT_TO_U_GET_BYTE(value); | |
167 | value=UCNV_EXT_TO_U_GET_VALUE(value); | |
168 | if( value!=0 && | |
169 | (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) || | |
170 | TO_U_USE_FALLBACK(useFallback)) && | |
171 | UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j) | |
172 | ) { | |
173 | /* remember longest match so far */ | |
174 | matchValue=value; | |
175 | matchLength=i+j; | |
176 | } | |
177 | ||
178 | /* match pre[] then src[] */ | |
179 | if(i<preLength) { | |
180 | b=(uint8_t)pre[i++]; | |
181 | } else if(j<srcLength) { | |
182 | b=(uint8_t)src[j++]; | |
183 | } else { | |
184 | /* all input consumed, partial match */ | |
185 | if(flush || (length=(i+j))>UCNV_EXT_MAX_BYTES) { | |
186 | /* | |
187 | * end of the entire input stream, stop with the longest match so far | |
188 | * or: partial match must not be longer than UCNV_EXT_MAX_BYTES | |
189 | * because it must fit into state buffers | |
190 | */ | |
191 | break; | |
192 | } else { | |
193 | /* continue with more input next time */ | |
194 | return -length; | |
195 | } | |
196 | } | |
197 | ||
198 | /* search for the current UChar */ | |
199 | value=ucnv_extFindToU(toUSection, length, b); | |
200 | if(value==0) { | |
201 | /* no match here, stop with the longest match so far */ | |
202 | break; | |
203 | } else { | |
204 | if(UCNV_EXT_TO_U_IS_PARTIAL(value)) { | |
205 | /* partial match, continue */ | |
729e4ab9 | 206 | idx=(int32_t)UCNV_EXT_TO_U_GET_PARTIAL_INDEX(value); |
374ca955 A |
207 | } else { |
208 | if( (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) || | |
209 | TO_U_USE_FALLBACK(useFallback)) && | |
210 | UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j) | |
211 | ) { | |
212 | /* full match, stop with result */ | |
213 | matchValue=value; | |
214 | matchLength=i+j; | |
215 | } else { | |
216 | /* full match on fallback not taken, stop with the longest match so far */ | |
217 | } | |
218 | break; | |
219 | } | |
220 | } | |
221 | } | |
222 | ||
223 | if(matchLength==0) { | |
224 | /* no match at all */ | |
225 | return 0; | |
226 | } | |
227 | ||
228 | /* return result */ | |
229 | *pMatchValue=UCNV_EXT_TO_U_MASK_ROUNDTRIP(matchValue); | |
230 | return matchLength; | |
231 | } | |
232 | ||
4388f060 | 233 | static inline void |
374ca955 A |
234 | ucnv_extWriteToU(UConverter *cnv, const int32_t *cx, |
235 | uint32_t value, | |
236 | UChar **target, const UChar *targetLimit, | |
237 | int32_t **offsets, int32_t srcIndex, | |
238 | UErrorCode *pErrorCode) { | |
239 | /* output the result */ | |
240 | if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) { | |
241 | /* output a single code point */ | |
242 | ucnv_toUWriteCodePoint( | |
243 | cnv, UCNV_EXT_TO_U_GET_CODE_POINT(value), | |
244 | target, targetLimit, | |
245 | offsets, srcIndex, | |
246 | pErrorCode); | |
247 | } else { | |
248 | /* output a string - with correct data we have resultLength>0 */ | |
249 | ucnv_toUWriteUChars( | |
250 | cnv, | |
251 | UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_UCHARS_INDEX, UChar)+ | |
252 | UCNV_EXT_TO_U_GET_INDEX(value), | |
253 | UCNV_EXT_TO_U_GET_LENGTH(value), | |
254 | target, targetLimit, | |
255 | offsets, srcIndex, | |
256 | pErrorCode); | |
257 | } | |
258 | } | |
259 | ||
260 | /* | |
261 | * get the SI/SO toU state (state 0 is for SBCS, 1 for DBCS), | |
262 | * or 1 for DBCS-only, | |
263 | * or -1 if the converter is not SI/SO stateful | |
264 | * | |
265 | * Note: For SI/SO stateful converters getting here, | |
266 | * cnv->mode==0 is equivalent to firstLength==1. | |
267 | */ | |
268 | #define UCNV_SISO_STATE(cnv) \ | |
269 | ((cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO ? (int8_t)(cnv)->mode : \ | |
270 | (cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? 1 : -1) | |
271 | ||
272 | /* | |
273 | * target<targetLimit; set error code for overflow | |
274 | */ | |
275 | U_CFUNC UBool | |
276 | ucnv_extInitialMatchToU(UConverter *cnv, const int32_t *cx, | |
277 | int32_t firstLength, | |
278 | const char **src, const char *srcLimit, | |
279 | UChar **target, const UChar *targetLimit, | |
280 | int32_t **offsets, int32_t srcIndex, | |
281 | UBool flush, | |
282 | UErrorCode *pErrorCode) { | |
729e4ab9 | 283 | uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ |
374ca955 A |
284 | int32_t match; |
285 | ||
286 | /* try to match */ | |
287 | match=ucnv_extMatchToU(cx, (int8_t)UCNV_SISO_STATE(cnv), | |
288 | (const char *)cnv->toUBytes, firstLength, | |
289 | *src, (int32_t)(srcLimit-*src), | |
290 | &value, | |
291 | cnv->useFallback, flush); | |
292 | if(match>0) { | |
293 | /* advance src pointer for the consumed input */ | |
294 | *src+=match-firstLength; | |
295 | ||
296 | /* write result to target */ | |
297 | ucnv_extWriteToU(cnv, cx, | |
298 | value, | |
299 | target, targetLimit, | |
300 | offsets, srcIndex, | |
301 | pErrorCode); | |
302 | return TRUE; | |
303 | } else if(match<0) { | |
304 | /* save state for partial match */ | |
305 | const char *s; | |
306 | int32_t j; | |
307 | ||
308 | /* copy the first code point */ | |
309 | s=(const char *)cnv->toUBytes; | |
310 | cnv->preToUFirstLength=(int8_t)firstLength; | |
311 | for(j=0; j<firstLength; ++j) { | |
312 | cnv->preToU[j]=*s++; | |
313 | } | |
314 | ||
315 | /* now copy the newly consumed input */ | |
316 | s=*src; | |
317 | match=-match; | |
318 | for(; j<match; ++j) { | |
319 | cnv->preToU[j]=*s++; | |
320 | } | |
321 | *src=s; /* same as *src=srcLimit; because we reached the end of input */ | |
322 | cnv->preToULength=(int8_t)match; | |
323 | return TRUE; | |
324 | } else /* match==0 no match */ { | |
325 | return FALSE; | |
326 | } | |
327 | } | |
328 | ||
329 | U_CFUNC UChar32 | |
330 | ucnv_extSimpleMatchToU(const int32_t *cx, | |
331 | const char *source, int32_t length, | |
332 | UBool useFallback) { | |
729e4ab9 | 333 | uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ |
374ca955 A |
334 | int32_t match; |
335 | ||
336 | if(length<=0) { | |
337 | return 0xffff; | |
338 | } | |
339 | ||
340 | /* try to match */ | |
341 | match=ucnv_extMatchToU(cx, -1, | |
342 | source, length, | |
343 | NULL, 0, | |
344 | &value, | |
345 | useFallback, TRUE); | |
346 | if(match==length) { | |
347 | /* write result for simple, single-character conversion */ | |
348 | if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) { | |
349 | return UCNV_EXT_TO_U_GET_CODE_POINT(value); | |
350 | } | |
351 | } | |
352 | ||
353 | /* | |
354 | * return no match because | |
355 | * - match>0 && value points to string: simple conversion cannot handle multiple code points | |
356 | * - match>0 && match!=length: not all input consumed, forbidden for this function | |
357 | * - match==0: no match found in the first place | |
358 | * - match<0: partial match, not supported for simple conversion (and flush==TRUE) | |
359 | */ | |
360 | return 0xfffe; | |
361 | } | |
362 | ||
363 | /* | |
364 | * continue partial match with new input | |
365 | * never called for simple, single-character conversion | |
366 | */ | |
367 | U_CFUNC void | |
368 | ucnv_extContinueMatchToU(UConverter *cnv, | |
369 | UConverterToUnicodeArgs *pArgs, int32_t srcIndex, | |
370 | UErrorCode *pErrorCode) { | |
729e4ab9 | 371 | uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ |
374ca955 A |
372 | int32_t match, length; |
373 | ||
374 | match=ucnv_extMatchToU(cnv->sharedData->mbcs.extIndexes, (int8_t)UCNV_SISO_STATE(cnv), | |
375 | cnv->preToU, cnv->preToULength, | |
376 | pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source), | |
377 | &value, | |
378 | cnv->useFallback, pArgs->flush); | |
379 | if(match>0) { | |
380 | if(match>=cnv->preToULength) { | |
381 | /* advance src pointer for the consumed input */ | |
382 | pArgs->source+=match-cnv->preToULength; | |
383 | cnv->preToULength=0; | |
384 | } else { | |
385 | /* the match did not use all of preToU[] - keep the rest for replay */ | |
386 | length=cnv->preToULength-match; | |
387 | uprv_memmove(cnv->preToU, cnv->preToU+match, length); | |
388 | cnv->preToULength=(int8_t)-length; | |
389 | } | |
390 | ||
391 | /* write result */ | |
392 | ucnv_extWriteToU(cnv, cnv->sharedData->mbcs.extIndexes, | |
393 | value, | |
394 | &pArgs->target, pArgs->targetLimit, | |
395 | &pArgs->offsets, srcIndex, | |
396 | pErrorCode); | |
397 | } else if(match<0) { | |
398 | /* save state for partial match */ | |
399 | const char *s; | |
400 | int32_t j; | |
401 | ||
402 | /* just _append_ the newly consumed input to preToU[] */ | |
403 | s=pArgs->source; | |
404 | match=-match; | |
405 | for(j=cnv->preToULength; j<match; ++j) { | |
406 | cnv->preToU[j]=*s++; | |
407 | } | |
408 | pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */ | |
409 | cnv->preToULength=(int8_t)match; | |
410 | } else /* match==0 */ { | |
411 | /* | |
412 | * no match | |
413 | * | |
414 | * We need to split the previous input into two parts: | |
415 | * | |
416 | * 1. The first codepage character is unmappable - that's how we got into | |
417 | * trying the extension data in the first place. | |
418 | * We need to move it from the preToU buffer | |
419 | * to the error buffer, set an error code, | |
420 | * and prepare the rest of the previous input for 2. | |
421 | * | |
422 | * 2. The rest of the previous input must be converted once we | |
423 | * come back from the callback for the first character. | |
424 | * At that time, we have to try again from scratch to convert | |
425 | * these input characters. | |
426 | * The replay will be handled by the ucnv.c conversion code. | |
427 | */ | |
428 | ||
429 | /* move the first codepage character to the error field */ | |
430 | uprv_memcpy(cnv->toUBytes, cnv->preToU, cnv->preToUFirstLength); | |
431 | cnv->toULength=cnv->preToUFirstLength; | |
432 | ||
433 | /* move the rest up inside the buffer */ | |
434 | length=cnv->preToULength-cnv->preToUFirstLength; | |
435 | if(length>0) { | |
436 | uprv_memmove(cnv->preToU, cnv->preToU+cnv->preToUFirstLength, length); | |
437 | } | |
438 | ||
439 | /* mark preToU for replay */ | |
440 | cnv->preToULength=(int8_t)-length; | |
441 | ||
442 | /* set the error code for unassigned */ | |
443 | *pErrorCode=U_INVALID_CHAR_FOUND; | |
444 | } | |
445 | } | |
446 | ||
447 | /* from Unicode ------------------------------------------------------------- */ | |
448 | ||
51004dcb A |
449 | // Use roundtrips, "good one-way" mappings, and some normal fallbacks. |
450 | static inline UBool | |
451 | extFromUUseMapping(UBool useFallback, uint32_t value, UChar32 firstCP) { | |
452 | return | |
453 | ((value&UCNV_EXT_FROM_U_STATUS_MASK)!=0 || | |
454 | FROM_U_USE_FALLBACK(useFallback, firstCP)) && | |
455 | (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0; | |
456 | } | |
457 | ||
374ca955 A |
458 | /* |
459 | * @return index of the UChar, if found; else <0 | |
460 | */ | |
4388f060 | 461 | static inline int32_t |
374ca955 A |
462 | ucnv_extFindFromU(const UChar *fromUSection, int32_t length, UChar u) { |
463 | int32_t i, start, limit; | |
464 | ||
465 | /* binary search */ | |
466 | start=0; | |
467 | limit=length; | |
468 | for(;;) { | |
469 | i=limit-start; | |
470 | if(i<=1) { | |
471 | break; /* done */ | |
472 | } | |
473 | /* start<limit-1 */ | |
474 | ||
475 | if(i<=4) { | |
476 | /* linear search for the last part */ | |
477 | if(u<=fromUSection[start]) { | |
478 | break; | |
479 | } | |
480 | if(++start<limit && u<=fromUSection[start]) { | |
481 | break; | |
482 | } | |
483 | if(++start<limit && u<=fromUSection[start]) { | |
484 | break; | |
485 | } | |
486 | /* always break at start==limit-1 */ | |
487 | ++start; | |
488 | break; | |
489 | } | |
490 | ||
491 | i=(start+limit)/2; | |
492 | if(u<fromUSection[i]) { | |
493 | limit=i; | |
494 | } else { | |
495 | start=i; | |
496 | } | |
497 | } | |
498 | ||
499 | /* did we really find it? */ | |
500 | if(start<limit && u==fromUSection[start]) { | |
501 | return start; | |
502 | } else { | |
503 | return -1; /* not found */ | |
504 | } | |
505 | } | |
506 | ||
507 | /* | |
508 | * @param cx pointer to extension data; if NULL, returns 0 | |
509 | * @param firstCP the first code point before all the other UChars | |
510 | * @param pre UChars that must match; !initialMatch: partial match with them | |
511 | * @param preLength length of pre, >=0 | |
512 | * @param src UChars that can be used to complete a match | |
513 | * @param srcLength length of src, >=0 | |
514 | * @param pMatchValue [out] output result value for the match from the data structure | |
515 | * @param useFallback "use fallback" flag, usually from cnv->useFallback | |
516 | * @param flush TRUE if the end of the input stream is reached | |
517 | * @return >1: matched, return value=total match length (number of input units matched) | |
518 | * 1: matched, no mapping but request for <subchar1> | |
519 | * (only for the first code point) | |
520 | * 0: no match | |
521 | * <0: partial match, return value=negative total match length | |
522 | * (partial matches are never returned for flush==TRUE) | |
523 | * (partial matches are never returned as being longer than UCNV_EXT_MAX_UCHARS) | |
524 | * the matchLength is 2 if only firstCP matched, and >2 if firstCP and | |
525 | * further code units matched | |
526 | */ | |
527 | static int32_t | |
528 | ucnv_extMatchFromU(const int32_t *cx, | |
529 | UChar32 firstCP, | |
530 | const UChar *pre, int32_t preLength, | |
531 | const UChar *src, int32_t srcLength, | |
532 | uint32_t *pMatchValue, | |
533 | UBool useFallback, UBool flush) { | |
534 | const uint16_t *stage12, *stage3; | |
535 | const uint32_t *stage3b; | |
536 | ||
537 | const UChar *fromUTableUChars, *fromUSectionUChars; | |
538 | const uint32_t *fromUTableValues, *fromUSectionValues; | |
539 | ||
540 | uint32_t value, matchValue; | |
729e4ab9 | 541 | int32_t i, j, idx, length, matchLength; |
374ca955 A |
542 | UChar c; |
543 | ||
544 | if(cx==NULL) { | |
545 | return 0; /* no extension data, no match */ | |
546 | } | |
547 | ||
548 | /* trie lookup of firstCP */ | |
729e4ab9 A |
549 | idx=firstCP>>10; /* stage 1 index */ |
550 | if(idx>=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]) { | |
374ca955 A |
551 | return 0; /* the first code point is outside the trie */ |
552 | } | |
553 | ||
554 | stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t); | |
555 | stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t); | |
729e4ab9 | 556 | idx=UCNV_EXT_FROM_U(stage12, stage3, idx, firstCP); |
374ca955 A |
557 | |
558 | stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t); | |
729e4ab9 | 559 | value=stage3b[idx]; |
374ca955 A |
560 | if(value==0) { |
561 | return 0; | |
562 | } | |
563 | ||
46f4442e A |
564 | /* |
565 | * Tests for (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0: | |
566 | * Do not interpret values with reserved bits used, for forward compatibility, | |
567 | * and do not even remember intermediate results with reserved bits used. | |
568 | */ | |
569 | ||
374ca955 A |
570 | if(UCNV_EXT_TO_U_IS_PARTIAL(value)) { |
571 | /* partial match, enter the loop below */ | |
729e4ab9 | 572 | idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value); |
374ca955 A |
573 | |
574 | /* initialize */ | |
575 | fromUTableUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar); | |
576 | fromUTableValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t); | |
577 | ||
578 | matchValue=0; | |
579 | i=j=matchLength=0; | |
580 | ||
581 | /* we must not remember fallback matches when not using fallbacks */ | |
582 | ||
583 | /* match input units until there is a full match or the input is consumed */ | |
584 | for(;;) { | |
585 | /* go to the next section */ | |
729e4ab9 A |
586 | fromUSectionUChars=fromUTableUChars+idx; |
587 | fromUSectionValues=fromUTableValues+idx; | |
374ca955 A |
588 | |
589 | /* read first pair of the section */ | |
590 | length=*fromUSectionUChars++; | |
591 | value=*fromUSectionValues++; | |
51004dcb | 592 | if(value!=0 && extFromUUseMapping(useFallback, value, firstCP)) { |
374ca955 A |
593 | /* remember longest match so far */ |
594 | matchValue=value; | |
595 | matchLength=2+i+j; | |
596 | } | |
597 | ||
598 | /* match pre[] then src[] */ | |
599 | if(i<preLength) { | |
600 | c=pre[i++]; | |
601 | } else if(j<srcLength) { | |
602 | c=src[j++]; | |
603 | } else { | |
604 | /* all input consumed, partial match */ | |
605 | if(flush || (length=(i+j))>UCNV_EXT_MAX_UCHARS) { | |
606 | /* | |
607 | * end of the entire input stream, stop with the longest match so far | |
608 | * or: partial match must not be longer than UCNV_EXT_MAX_UCHARS | |
609 | * because it must fit into state buffers | |
610 | */ | |
611 | break; | |
612 | } else { | |
613 | /* continue with more input next time */ | |
614 | return -(2+length); | |
615 | } | |
616 | } | |
617 | ||
618 | /* search for the current UChar */ | |
729e4ab9 A |
619 | idx=ucnv_extFindFromU(fromUSectionUChars, length, c); |
620 | if(idx<0) { | |
374ca955 A |
621 | /* no match here, stop with the longest match so far */ |
622 | break; | |
623 | } else { | |
729e4ab9 | 624 | value=fromUSectionValues[idx]; |
374ca955 A |
625 | if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { |
626 | /* partial match, continue */ | |
729e4ab9 | 627 | idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value); |
374ca955 | 628 | } else { |
51004dcb | 629 | if(extFromUUseMapping(useFallback, value, firstCP)) { |
374ca955 A |
630 | /* full match, stop with result */ |
631 | matchValue=value; | |
632 | matchLength=2+i+j; | |
633 | } else { | |
634 | /* full match on fallback not taken, stop with the longest match so far */ | |
635 | } | |
636 | break; | |
637 | } | |
638 | } | |
639 | } | |
640 | ||
641 | if(matchLength==0) { | |
642 | /* no match at all */ | |
643 | return 0; | |
644 | } | |
645 | } else /* result from firstCP trie lookup */ { | |
51004dcb | 646 | if(extFromUUseMapping(useFallback, value, firstCP)) { |
374ca955 A |
647 | /* full match, stop with result */ |
648 | matchValue=value; | |
649 | matchLength=2; | |
650 | } else { | |
651 | /* fallback not taken */ | |
652 | return 0; | |
653 | } | |
654 | } | |
655 | ||
374ca955 A |
656 | /* return result */ |
657 | if(matchValue==UCNV_EXT_FROM_U_SUBCHAR1) { | |
658 | return 1; /* assert matchLength==2 */ | |
659 | } | |
660 | ||
46f4442e | 661 | *pMatchValue=matchValue; |
374ca955 A |
662 | return matchLength; |
663 | } | |
664 | ||
46f4442e A |
665 | /* |
666 | * @param value fromUnicode mapping table value; ignores roundtrip and reserved bits | |
667 | */ | |
4388f060 | 668 | static inline void |
374ca955 A |
669 | ucnv_extWriteFromU(UConverter *cnv, const int32_t *cx, |
670 | uint32_t value, | |
671 | char **target, const char *targetLimit, | |
672 | int32_t **offsets, int32_t srcIndex, | |
673 | UErrorCode *pErrorCode) { | |
674 | uint8_t buffer[1+UCNV_EXT_MAX_BYTES]; | |
675 | const uint8_t *result; | |
676 | int32_t length, prevLength; | |
677 | ||
678 | length=UCNV_EXT_FROM_U_GET_LENGTH(value); | |
679 | value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value); | |
680 | ||
681 | /* output the result */ | |
682 | if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) { | |
683 | /* | |
684 | * Generate a byte array and then write it below. | |
685 | * This is not the fastest possible way, but it should be ok for | |
686 | * extension mappings, and it is much simpler. | |
687 | * Offset and overflow handling are only done once this way. | |
688 | */ | |
689 | uint8_t *p=buffer+1; /* reserve buffer[0] for shiftByte below */ | |
690 | switch(length) { | |
691 | case 3: | |
692 | *p++=(uint8_t)(value>>16); | |
2ca993e8 A |
693 | U_FALLTHROUGH; |
694 | case 2: | |
374ca955 | 695 | *p++=(uint8_t)(value>>8); |
2ca993e8 A |
696 | U_FALLTHROUGH; |
697 | case 1: | |
374ca955 | 698 | *p++=(uint8_t)value; |
2ca993e8 | 699 | U_FALLTHROUGH; |
374ca955 A |
700 | default: |
701 | break; /* will never occur */ | |
702 | } | |
703 | result=buffer+1; | |
704 | } else { | |
705 | result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value; | |
706 | } | |
707 | ||
708 | /* with correct data we have length>0 */ | |
709 | ||
710 | if((prevLength=cnv->fromUnicodeStatus)!=0) { | |
711 | /* handle SI/SO stateful output */ | |
712 | uint8_t shiftByte; | |
713 | ||
714 | if(prevLength>1 && length==1) { | |
715 | /* change from double-byte mode to single-byte */ | |
716 | shiftByte=(uint8_t)UCNV_SI; | |
717 | cnv->fromUnicodeStatus=1; | |
718 | } else if(prevLength==1 && length>1) { | |
719 | /* change from single-byte mode to double-byte */ | |
720 | shiftByte=(uint8_t)UCNV_SO; | |
721 | cnv->fromUnicodeStatus=2; | |
722 | } else { | |
723 | shiftByte=0; | |
724 | } | |
725 | ||
726 | if(shiftByte!=0) { | |
727 | /* prepend the shift byte to the result bytes */ | |
728 | buffer[0]=shiftByte; | |
729 | if(result!=buffer+1) { | |
730 | uprv_memcpy(buffer+1, result, length); | |
731 | } | |
732 | result=buffer; | |
733 | ++length; | |
734 | } | |
735 | } | |
736 | ||
737 | ucnv_fromUWriteBytes(cnv, (const char *)result, length, | |
738 | target, targetLimit, | |
739 | offsets, srcIndex, | |
740 | pErrorCode); | |
741 | } | |
742 | ||
743 | /* | |
744 | * target<targetLimit; set error code for overflow | |
745 | */ | |
746 | U_CFUNC UBool | |
747 | ucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx, | |
748 | UChar32 cp, | |
749 | const UChar **src, const UChar *srcLimit, | |
750 | char **target, const char *targetLimit, | |
751 | int32_t **offsets, int32_t srcIndex, | |
752 | UBool flush, | |
753 | UErrorCode *pErrorCode) { | |
729e4ab9 | 754 | uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ |
374ca955 A |
755 | int32_t match; |
756 | ||
757 | /* try to match */ | |
758 | match=ucnv_extMatchFromU(cx, cp, | |
759 | NULL, 0, | |
760 | *src, (int32_t)(srcLimit-*src), | |
761 | &value, | |
762 | cnv->useFallback, flush); | |
763 | ||
764 | /* reject a match if the result is a single byte for DBCS-only */ | |
765 | if( match>=2 && | |
766 | !(UCNV_EXT_FROM_U_GET_LENGTH(value)==1 && | |
767 | cnv->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY) | |
768 | ) { | |
769 | /* advance src pointer for the consumed input */ | |
770 | *src+=match-2; /* remove 2 for the initial code point */ | |
771 | ||
772 | /* write result to target */ | |
773 | ucnv_extWriteFromU(cnv, cx, | |
774 | value, | |
775 | target, targetLimit, | |
776 | offsets, srcIndex, | |
777 | pErrorCode); | |
778 | return TRUE; | |
779 | } else if(match<0) { | |
780 | /* save state for partial match */ | |
781 | const UChar *s; | |
782 | int32_t j; | |
783 | ||
784 | /* copy the first code point */ | |
785 | cnv->preFromUFirstCP=cp; | |
786 | ||
787 | /* now copy the newly consumed input */ | |
788 | s=*src; | |
789 | match=-match-2; /* remove 2 for the initial code point */ | |
790 | for(j=0; j<match; ++j) { | |
791 | cnv->preFromU[j]=*s++; | |
792 | } | |
793 | *src=s; /* same as *src=srcLimit; because we reached the end of input */ | |
794 | cnv->preFromULength=(int8_t)match; | |
795 | return TRUE; | |
796 | } else if(match==1) { | |
797 | /* matched, no mapping but request for <subchar1> */ | |
798 | cnv->useSubChar1=TRUE; | |
799 | return FALSE; | |
800 | } else /* match==0 no match */ { | |
801 | return FALSE; | |
802 | } | |
803 | } | |
804 | ||
46f4442e A |
805 | /* |
806 | * Used by ISO 2022 implementation. | |
807 | * @return number of bytes in *pValue; negative number if fallback; 0 for no mapping | |
808 | */ | |
374ca955 A |
809 | U_CFUNC int32_t |
810 | ucnv_extSimpleMatchFromU(const int32_t *cx, | |
811 | UChar32 cp, uint32_t *pValue, | |
812 | UBool useFallback) { | |
813 | uint32_t value; | |
814 | int32_t match; | |
815 | ||
816 | /* try to match */ | |
817 | match=ucnv_extMatchFromU(cx, | |
818 | cp, | |
819 | NULL, 0, | |
820 | NULL, 0, | |
821 | &value, | |
822 | useFallback, TRUE); | |
823 | if(match>=2) { | |
824 | /* write result for simple, single-character conversion */ | |
825 | int32_t length; | |
46f4442e A |
826 | int isRoundtrip; |
827 | ||
828 | isRoundtrip=UCNV_EXT_FROM_U_IS_ROUNDTRIP(value); | |
374ca955 A |
829 | length=UCNV_EXT_FROM_U_GET_LENGTH(value); |
830 | value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value); | |
831 | ||
832 | if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) { | |
833 | *pValue=value; | |
46f4442e | 834 | return isRoundtrip ? length : -length; |
374ca955 A |
835 | #if 0 /* not currently used */ |
836 | } else if(length==4) { | |
837 | /* de-serialize a 4-byte result */ | |
838 | const uint8_t *result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value; | |
839 | *pValue= | |
840 | ((uint32_t)result[0]<<24)| | |
841 | ((uint32_t)result[1]<<16)| | |
842 | ((uint32_t)result[2]<<8)| | |
843 | result[3]; | |
46f4442e | 844 | return isRoundtrip ? 4 : -4; |
374ca955 A |
845 | #endif |
846 | } | |
847 | } | |
848 | ||
849 | /* | |
850 | * return no match because | |
851 | * - match>1 && resultLength>4: result too long for simple conversion | |
852 | * - match==1: no match found, <subchar1> preferred | |
853 | * - match==0: no match found in the first place | |
854 | * - match<0: partial match, not supported for simple conversion (and flush==TRUE) | |
855 | */ | |
856 | return 0; | |
857 | } | |
858 | ||
859 | /* | |
860 | * continue partial match with new input, requires cnv->preFromUFirstCP>=0 | |
861 | * never called for simple, single-character conversion | |
862 | */ | |
863 | U_CFUNC void | |
864 | ucnv_extContinueMatchFromU(UConverter *cnv, | |
865 | UConverterFromUnicodeArgs *pArgs, int32_t srcIndex, | |
866 | UErrorCode *pErrorCode) { | |
729e4ab9 | 867 | uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ |
374ca955 A |
868 | int32_t match; |
869 | ||
870 | match=ucnv_extMatchFromU(cnv->sharedData->mbcs.extIndexes, | |
871 | cnv->preFromUFirstCP, | |
872 | cnv->preFromU, cnv->preFromULength, | |
873 | pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source), | |
874 | &value, | |
875 | cnv->useFallback, pArgs->flush); | |
876 | if(match>=2) { | |
877 | match-=2; /* remove 2 for the initial code point */ | |
878 | ||
879 | if(match>=cnv->preFromULength) { | |
880 | /* advance src pointer for the consumed input */ | |
881 | pArgs->source+=match-cnv->preFromULength; | |
882 | cnv->preFromULength=0; | |
883 | } else { | |
884 | /* the match did not use all of preFromU[] - keep the rest for replay */ | |
885 | int32_t length=cnv->preFromULength-match; | |
a62d09fc | 886 | u_memmove(cnv->preFromU, cnv->preFromU+match, length); |
374ca955 A |
887 | cnv->preFromULength=(int8_t)-length; |
888 | } | |
889 | ||
890 | /* finish the partial match */ | |
891 | cnv->preFromUFirstCP=U_SENTINEL; | |
892 | ||
893 | /* write result */ | |
894 | ucnv_extWriteFromU(cnv, cnv->sharedData->mbcs.extIndexes, | |
895 | value, | |
896 | &pArgs->target, pArgs->targetLimit, | |
897 | &pArgs->offsets, srcIndex, | |
898 | pErrorCode); | |
899 | } else if(match<0) { | |
900 | /* save state for partial match */ | |
901 | const UChar *s; | |
902 | int32_t j; | |
903 | ||
904 | /* just _append_ the newly consumed input to preFromU[] */ | |
905 | s=pArgs->source; | |
906 | match=-match-2; /* remove 2 for the initial code point */ | |
907 | for(j=cnv->preFromULength; j<match; ++j) { | |
4388f060 | 908 | U_ASSERT(j>=0); |
374ca955 A |
909 | cnv->preFromU[j]=*s++; |
910 | } | |
911 | pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */ | |
912 | cnv->preFromULength=(int8_t)match; | |
913 | } else /* match==0 or 1 */ { | |
914 | /* | |
915 | * no match | |
916 | * | |
917 | * We need to split the previous input into two parts: | |
918 | * | |
919 | * 1. The first code point is unmappable - that's how we got into | |
920 | * trying the extension data in the first place. | |
921 | * We need to move it from the preFromU buffer | |
922 | * to the error buffer, set an error code, | |
923 | * and prepare the rest of the previous input for 2. | |
924 | * | |
925 | * 2. The rest of the previous input must be converted once we | |
926 | * come back from the callback for the first code point. | |
927 | * At that time, we have to try again from scratch to convert | |
928 | * these input characters. | |
929 | * The replay will be handled by the ucnv.c conversion code. | |
930 | */ | |
931 | ||
932 | if(match==1) { | |
933 | /* matched, no mapping but request for <subchar1> */ | |
934 | cnv->useSubChar1=TRUE; | |
935 | } | |
936 | ||
937 | /* move the first code point to the error field */ | |
938 | cnv->fromUChar32=cnv->preFromUFirstCP; | |
939 | cnv->preFromUFirstCP=U_SENTINEL; | |
940 | ||
941 | /* mark preFromU for replay */ | |
942 | cnv->preFromULength=-cnv->preFromULength; | |
943 | ||
944 | /* set the error code for unassigned */ | |
945 | *pErrorCode=U_INVALID_CHAR_FOUND; | |
946 | } | |
947 | } | |
948 | ||
51004dcb A |
949 | static UBool |
950 | extSetUseMapping(UConverterUnicodeSet which, int32_t minLength, uint32_t value) { | |
951 | if(which==UCNV_ROUNDTRIP_SET) { | |
952 | // Add only code points for which the roundtrip flag is set. | |
953 | // Do not add any fallbacks, even if ucnv_fromUnicode() would use them | |
954 | // (fallbacks from PUA). See the API docs for ucnv_getUnicodeSet(). | |
955 | // | |
956 | // By analogy, also do not add "good one-way" mappings. | |
957 | // | |
958 | // Do not add entries with reserved bits set. | |
959 | if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))!= | |
960 | UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) { | |
961 | return FALSE; | |
962 | } | |
963 | } else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ { | |
964 | // Do not add entries with reserved bits set. | |
965 | if((value&UCNV_EXT_FROM_U_RESERVED_MASK)!=0) { | |
966 | return FALSE; | |
967 | } | |
968 | } | |
969 | // Do not add <subchar1> entries or other (future?) pseudo-entries | |
970 | // with an output length of 0. | |
971 | return UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength; | |
972 | } | |
973 | ||
374ca955 A |
974 | static void |
975 | ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData, | |
976 | const int32_t *cx, | |
73c04bcf | 977 | const USetAdder *sa, |
51004dcb | 978 | UConverterUnicodeSet which, |
374ca955 | 979 | int32_t minLength, |
51004dcb | 980 | UChar32 firstCP, |
374ca955 A |
981 | UChar s[UCNV_EXT_MAX_UCHARS], int32_t length, |
982 | int32_t sectionIndex, | |
983 | UErrorCode *pErrorCode) { | |
984 | const UChar *fromUSectionUChars; | |
985 | const uint32_t *fromUSectionValues; | |
986 | ||
987 | uint32_t value; | |
988 | int32_t i, count; | |
989 | ||
990 | fromUSectionUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar)+sectionIndex; | |
991 | fromUSectionValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t)+sectionIndex; | |
992 | ||
993 | /* read first pair of the section */ | |
994 | count=*fromUSectionUChars++; | |
995 | value=*fromUSectionValues++; | |
996 | ||
51004dcb A |
997 | if(extSetUseMapping(which, minLength, value)) { |
998 | if(length==U16_LENGTH(firstCP)) { | |
374ca955 | 999 | /* add the initial code point */ |
51004dcb | 1000 | sa->add(sa->set, firstCP); |
374ca955 A |
1001 | } else { |
1002 | /* add the string so far */ | |
1003 | sa->addString(sa->set, s, length); | |
1004 | } | |
1005 | } | |
1006 | ||
1007 | for(i=0; i<count; ++i) { | |
1008 | /* append this code unit and recurse or add the string */ | |
1009 | s[length]=fromUSectionUChars[i]; | |
1010 | value=fromUSectionValues[i]; | |
1011 | ||
1012 | if(value==0) { | |
1013 | /* no mapping, do nothing */ | |
1014 | } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { | |
1015 | ucnv_extGetUnicodeSetString( | |
51004dcb A |
1016 | sharedData, cx, sa, which, minLength, |
1017 | firstCP, s, length+1, | |
374ca955 A |
1018 | (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), |
1019 | pErrorCode); | |
51004dcb | 1020 | } else if(extSetUseMapping(which, minLength, value)) { |
374ca955 A |
1021 | sa->addString(sa->set, s, length+1); |
1022 | } | |
1023 | } | |
1024 | } | |
1025 | ||
1026 | U_CFUNC void | |
1027 | ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData, | |
73c04bcf | 1028 | const USetAdder *sa, |
374ca955 | 1029 | UConverterUnicodeSet which, |
46f4442e | 1030 | UConverterSetFilter filter, |
374ca955 A |
1031 | UErrorCode *pErrorCode) { |
1032 | const int32_t *cx; | |
1033 | const uint16_t *stage12, *stage3, *ps2, *ps3; | |
1034 | const uint32_t *stage3b; | |
1035 | ||
1036 | uint32_t value; | |
1037 | int32_t st1, stage1Length, st2, st3, minLength; | |
1038 | ||
1039 | UChar s[UCNV_EXT_MAX_UCHARS]; | |
1040 | UChar32 c; | |
1041 | int32_t length; | |
1042 | ||
1043 | cx=sharedData->mbcs.extIndexes; | |
1044 | if(cx==NULL) { | |
1045 | return; | |
1046 | } | |
1047 | ||
1048 | stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t); | |
1049 | stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t); | |
1050 | stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t); | |
1051 | ||
1052 | stage1Length=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]; | |
1053 | ||
1054 | /* enumerate the from-Unicode trie table */ | |
1055 | c=0; /* keep track of the current code point while enumerating */ | |
1056 | ||
46f4442e A |
1057 | if(filter==UCNV_SET_FILTER_2022_CN) { |
1058 | minLength=3; | |
1059 | } else if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY || | |
1060 | filter!=UCNV_SET_FILTER_NONE | |
1061 | ) { | |
374ca955 A |
1062 | /* DBCS-only, ignore single-byte results */ |
1063 | minLength=2; | |
1064 | } else { | |
1065 | minLength=1; | |
1066 | } | |
1067 | ||
1068 | /* | |
1069 | * the trie enumeration is almost the same as | |
1070 | * in MBCSGetUnicodeSet() for MBCS_OUTPUT_1 | |
1071 | */ | |
1072 | for(st1=0; st1<stage1Length; ++st1) { | |
1073 | st2=stage12[st1]; | |
1074 | if(st2>stage1Length) { | |
1075 | ps2=stage12+st2; | |
1076 | for(st2=0; st2<64; ++st2) { | |
1077 | if((st3=(int32_t)ps2[st2]<<UCNV_EXT_STAGE_2_LEFT_SHIFT)!=0) { | |
1078 | /* read the stage 3 block */ | |
1079 | ps3=stage3+st3; | |
1080 | ||
374ca955 A |
1081 | do { |
1082 | value=stage3b[*ps3++]; | |
1083 | if(value==0) { | |
1084 | /* no mapping, do nothing */ | |
1085 | } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { | |
51004dcb | 1086 | // Recurse for partial results. |
374ca955 A |
1087 | length=0; |
1088 | U16_APPEND_UNSAFE(s, length, c); | |
1089 | ucnv_extGetUnicodeSetString( | |
51004dcb | 1090 | sharedData, cx, sa, which, minLength, |
374ca955 A |
1091 | c, s, length, |
1092 | (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), | |
1093 | pErrorCode); | |
51004dcb | 1094 | } else if(extSetUseMapping(which, minLength, value)) { |
46f4442e A |
1095 | switch(filter) { |
1096 | case UCNV_SET_FILTER_2022_CN: | |
1097 | if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==3 && UCNV_EXT_FROM_U_GET_DATA(value)<=0x82ffff)) { | |
1098 | continue; | |
1099 | } | |
1100 | break; | |
1101 | case UCNV_SET_FILTER_SJIS: | |
1102 | if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && (value=UCNV_EXT_FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)) { | |
1103 | continue; | |
1104 | } | |
1105 | break; | |
1106 | case UCNV_SET_FILTER_GR94DBCS: | |
1107 | if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && | |
1108 | (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfefe - 0xa1a1) && | |
1109 | (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) { | |
1110 | continue; | |
1111 | } | |
1112 | break; | |
1113 | case UCNV_SET_FILTER_HZ: | |
1114 | if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && | |
1115 | (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfdfe - 0xa1a1) && | |
1116 | (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) { | |
1117 | continue; | |
1118 | } | |
1119 | break; | |
1120 | default: | |
1121 | /* | |
1122 | * UCNV_SET_FILTER_NONE, | |
1123 | * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength | |
1124 | */ | |
1125 | break; | |
1126 | } | |
374ca955 A |
1127 | sa->add(sa->set, c); |
1128 | } | |
1129 | } while((++c&0xf)!=0); | |
1130 | } else { | |
1131 | c+=16; /* empty stage 3 block */ | |
1132 | } | |
1133 | } | |
1134 | } else { | |
1135 | c+=1024; /* empty stage 2 block */ | |
1136 | } | |
1137 | } | |
1138 | } | |
1139 | ||
1140 | #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |