]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ********************************************************************** | |
b331163b | 3 | * Copyright (C) 2000-2015, International Business Machines |
b75a7d8f A |
4 | * Corporation and others. All Rights Reserved. |
5 | ********************************************************************** | |
6 | * file name: ucnvhz.c | |
7 | * encoding: US-ASCII | |
8 | * tab size: 8 (not used) | |
9 | * indentation:4 | |
10 | * | |
11 | * created on: 2000oct16 | |
12 | * created by: Ram Viswanadha | |
13 | * 10/31/2000 Ram Implemented offsets logic function | |
14 | * | |
15 | */ | |
16 | ||
17 | #include "unicode/utypes.h" | |
18 | ||
b331163b | 19 | #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION |
b75a7d8f A |
20 | |
21 | #include "cmemory.h" | |
b75a7d8f A |
22 | #include "unicode/ucnv.h" |
23 | #include "unicode/ucnv_cb.h" | |
24 | #include "unicode/uset.h" | |
4388f060 | 25 | #include "unicode/utf16.h" |
b75a7d8f A |
26 | #include "ucnv_bld.h" |
27 | #include "ucnv_cnv.h" | |
729e4ab9 | 28 | #include "ucnv_imp.h" |
b75a7d8f A |
29 | |
30 | #define UCNV_TILDE 0x7E /* ~ */ | |
31 | #define UCNV_OPEN_BRACE 0x7B /* { */ | |
32 | #define UCNV_CLOSE_BRACE 0x7D /* } */ | |
33 | #define SB_ESCAPE "\x7E\x7D" | |
34 | #define DB_ESCAPE "\x7E\x7B" | |
35 | #define TILDE_ESCAPE "\x7E\x7E" | |
36 | #define ESC_LEN 2 | |
37 | ||
38 | ||
39 | #define CONCAT_ESCAPE_MACRO( args, targetIndex,targetLength,strToAppend, err, len,sourceIndex){ \ | |
40 | while(len-->0){ \ | |
41 | if(targetIndex < targetLength){ \ | |
42 | args->target[targetIndex] = (unsigned char) *strToAppend; \ | |
43 | if(args->offsets!=NULL){ \ | |
44 | *(offsets++) = sourceIndex-1; \ | |
45 | } \ | |
46 | targetIndex++; \ | |
47 | } \ | |
48 | else{ \ | |
49 | args->converter->charErrorBuffer[(int)args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend; \ | |
50 | *err =U_BUFFER_OVERFLOW_ERROR; \ | |
51 | } \ | |
52 | strToAppend++; \ | |
53 | } \ | |
54 | } | |
55 | ||
56 | ||
57 | typedef struct{ | |
73c04bcf | 58 | UConverter* gbConverter; |
b75a7d8f A |
59 | int32_t targetIndex; |
60 | int32_t sourceIndex; | |
61 | UBool isEscapeAppended; | |
b75a7d8f A |
62 | UBool isStateDBCS; |
63 | UBool isTargetUCharDBCS; | |
d5d484b0 | 64 | UBool isEmptySegment; |
b75a7d8f A |
65 | }UConverterDataHZ; |
66 | ||
67 | ||
68 | ||
69 | static void | |
729e4ab9 A |
70 | _HZOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ |
71 | UConverter *gbConverter; | |
72 | if(pArgs->onlyTestIsLoadable) { | |
73 | ucnv_canCreateConverter("GBK", errorCode); /* errorCode carries result */ | |
74 | return; | |
75 | } | |
76 | gbConverter = ucnv_open("GBK", errorCode); | |
77 | if(U_FAILURE(*errorCode)) { | |
78 | return; | |
79 | } | |
b75a7d8f A |
80 | cnv->toUnicodeStatus = 0; |
81 | cnv->fromUnicodeStatus= 0; | |
82 | cnv->mode=0; | |
374ca955 | 83 | cnv->fromUChar32=0x0000; |
4388f060 | 84 | cnv->extraInfo = uprv_calloc(1, sizeof(UConverterDataHZ)); |
b75a7d8f | 85 | if(cnv->extraInfo != NULL){ |
729e4ab9 | 86 | ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = gbConverter; |
b75a7d8f | 87 | } |
b75a7d8f | 88 | else { |
729e4ab9 | 89 | ucnv_close(gbConverter); |
b75a7d8f A |
90 | *errorCode = U_MEMORY_ALLOCATION_ERROR; |
91 | return; | |
92 | } | |
93 | } | |
94 | ||
95 | static void | |
96 | _HZClose(UConverter *cnv){ | |
97 | if(cnv->extraInfo != NULL) { | |
98 | ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter); | |
99 | if(!cnv->isExtraLocal) { | |
100 | uprv_free(cnv->extraInfo); | |
101 | } | |
102 | cnv->extraInfo = NULL; | |
103 | } | |
104 | } | |
105 | ||
106 | static void | |
107 | _HZReset(UConverter *cnv, UConverterResetChoice choice){ | |
108 | if(choice<=UCNV_RESET_TO_UNICODE) { | |
109 | cnv->toUnicodeStatus = 0; | |
110 | cnv->mode=0; | |
111 | if(cnv->extraInfo != NULL){ | |
112 | ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE; | |
d5d484b0 | 113 | ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE; |
b75a7d8f A |
114 | } |
115 | } | |
116 | if(choice!=UCNV_RESET_TO_UNICODE) { | |
117 | cnv->fromUnicodeStatus= 0; | |
374ca955 | 118 | cnv->fromUChar32=0x0000; |
b75a7d8f A |
119 | if(cnv->extraInfo != NULL){ |
120 | ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE; | |
121 | ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0; | |
122 | ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0; | |
123 | ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE; | |
124 | } | |
125 | } | |
126 | } | |
127 | ||
128 | /**************************************HZ Encoding************************************************* | |
129 | * Rules for HZ encoding | |
130 | * | |
131 | * In ASCII mode, a byte is interpreted as an ASCII character, unless a | |
132 | * '~' is encountered. The character '~' is an escape character. By | |
133 | * convention, it must be immediately followed ONLY by '~', '{' or '\n' | |
134 | * (<LF>), with the following special meaning. | |
135 | ||
136 | * 1. The escape sequence '~~' is interpreted as a '~'. | |
137 | * 2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB. | |
138 | * 3. The escape sequence '~\n' is a line-continuation marker to be | |
139 | * consumed with no output produced. | |
140 | * In GB mode, characters are interpreted two bytes at a time as (pure) | |
141 | * GB codes until the escape-from-GB code '~}' is read. This code | |
142 | * switches the mode from GB back to ASCII. (Note that the escape- | |
143 | * from-GB code '~}' ($7E7D) is outside the defined GB range.) | |
144 | * | |
145 | * Source: RFC 1842 | |
46f4442e A |
146 | * |
147 | * Note that the formal syntax in RFC 1842 is invalid. I assume that the | |
148 | * intended definition of single-byte-segment is as follows (pedberg): | |
149 | * single-byte-segment = single-byte-seq 1*single-byte-char | |
b75a7d8f A |
150 | */ |
151 | ||
152 | ||
153 | static void | |
154 | UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, | |
155 | UErrorCode* err){ | |
374ca955 | 156 | char tempBuf[2]; |
b75a7d8f A |
157 | const char *mySource = ( char *) args->source; |
158 | UChar *myTarget = args->target; | |
b75a7d8f A |
159 | const char *mySourceLimit = args->sourceLimit; |
160 | UChar32 targetUniChar = 0x0000; | |
fd0068a8 | 161 | int32_t mySourceChar = 0x0000; |
b75a7d8f | 162 | UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo); |
73c04bcf A |
163 | tempBuf[0]=0; |
164 | tempBuf[1]=0; | |
46f4442e A |
165 | |
166 | /* Calling code already handles this situation. */ | |
167 | /*if ((args->converter == NULL) || (args->targetLimit < args->target) || (mySourceLimit < args->source)){ | |
b75a7d8f A |
168 | *err = U_ILLEGAL_ARGUMENT_ERROR; |
169 | return; | |
46f4442e | 170 | }*/ |
b75a7d8f | 171 | |
374ca955 | 172 | while(mySource< mySourceLimit){ |
b75a7d8f A |
173 | |
174 | if(myTarget < args->targetLimit){ | |
175 | ||
176 | mySourceChar= (unsigned char) *mySource++; | |
46f4442e | 177 | |
fd0068a8 A |
178 | if(args->converter->mode == UCNV_TILDE) { |
179 | /* second byte after ~ */ | |
180 | args->converter->mode=0; | |
181 | switch(mySourceChar) { | |
46f4442e A |
182 | case 0x0A: |
183 | /* no output for ~\n (line-continuation marker) */ | |
184 | continue; | |
185 | case UCNV_TILDE: | |
186 | if(args->offsets) { | |
187 | args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2); | |
188 | } | |
189 | *(myTarget++)=(UChar)mySourceChar; | |
190 | myData->isEmptySegment = FALSE; | |
191 | continue; | |
192 | case UCNV_OPEN_BRACE: | |
193 | case UCNV_CLOSE_BRACE: | |
194 | myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE); | |
195 | if (myData->isEmptySegment) { | |
196 | myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */ | |
197 | *err = U_ILLEGAL_ESCAPE_SEQUENCE; | |
198 | args->converter->toUCallbackReason = UCNV_IRREGULAR; | |
199 | args->converter->toUBytes[0] = UCNV_TILDE; | |
200 | args->converter->toUBytes[1] = mySourceChar; | |
201 | args->converter->toULength = 2; | |
202 | args->target = myTarget; | |
203 | args->source = mySource; | |
204 | return; | |
205 | } | |
206 | myData->isEmptySegment = TRUE; | |
207 | continue; | |
208 | default: | |
209 | /* if the first byte is equal to TILDE and the trail byte | |
210 | * is not a valid byte then it is an error condition | |
211 | */ | |
212 | /* | |
213 | * Ticket 5691: consistent illegal sequences: | |
214 | * - We include at least the first byte in the illegal sequence. | |
215 | * - If any of the non-initial bytes could be the start of a character, | |
216 | * we stop the illegal sequence before the first one of those. | |
217 | */ | |
218 | myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ | |
219 | *err = U_ILLEGAL_ESCAPE_SEQUENCE; | |
220 | args->converter->toUBytes[0] = UCNV_TILDE; | |
221 | if( myData->isStateDBCS ? | |
222 | (0x21 <= mySourceChar && mySourceChar <= 0x7e) : | |
223 | mySourceChar <= 0x7f | |
224 | ) { | |
225 | /* The current byte could be the start of a character: Back it out. */ | |
226 | args->converter->toULength = 1; | |
227 | --mySource; | |
228 | } else { | |
229 | /* Include the current byte in the illegal sequence. */ | |
230 | args->converter->toUBytes[1] = mySourceChar; | |
231 | args->converter->toULength = 2; | |
232 | } | |
233 | args->target = myTarget; | |
234 | args->source = mySource; | |
235 | return; | |
fd0068a8 A |
236 | } |
237 | } else if(myData->isStateDBCS) { | |
238 | if(args->converter->toUnicodeStatus == 0x00){ | |
239 | /* lead byte */ | |
240 | if(mySourceChar == UCNV_TILDE) { | |
b75a7d8f | 241 | args->converter->mode = UCNV_TILDE; |
fd0068a8 A |
242 | } else { |
243 | /* add another bit to distinguish a 0 byte from not having seen a lead byte */ | |
244 | args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100); | |
245 | myData->isEmptySegment = FALSE; /* the segment has something, either valid or will produce a different error, so reset this */ | |
b75a7d8f | 246 | } |
b75a7d8f A |
247 | continue; |
248 | } | |
249 | else{ | |
fd0068a8 | 250 | /* trail byte */ |
fd0068a8 A |
251 | int leadIsOk, trailIsOk; |
252 | uint32_t leadByte = args->converter->toUnicodeStatus & 0xff; | |
253 | targetUniChar = 0xffff; | |
254 | /* | |
255 | * Ticket 5691: consistent illegal sequences: | |
256 | * - We include at least the first byte in the illegal sequence. | |
257 | * - If any of the non-initial bytes could be the start of a character, | |
258 | * we stop the illegal sequence before the first one of those. | |
259 | * | |
260 | * In HZ DBCS, if the second byte is in the 21..7e range, | |
261 | * we report only the first byte as the illegal sequence. | |
262 | * Otherwise we convert or report the pair of bytes. | |
263 | */ | |
264 | leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21); | |
265 | trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); | |
266 | if (leadIsOk && trailIsOk) { | |
267 | tempBuf[0] = (char) (leadByte+0x80) ; | |
268 | tempBuf[1] = (char) (mySourceChar+0x80); | |
269 | targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData, | |
270 | tempBuf, 2, args->converter->useFallback); | |
271 | mySourceChar= (leadByte << 8) | mySourceChar; | |
272 | } else if (trailIsOk) { | |
273 | /* report a single illegal byte and continue with the following DBCS starter byte */ | |
274 | --mySource; | |
275 | mySourceChar = (int32_t)leadByte; | |
276 | } else { | |
277 | /* report a pair of illegal bytes if the second byte is not a DBCS starter */ | |
278 | /* add another bit so that the code below writes 2 bytes in case of error */ | |
279 | mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar; | |
280 | } | |
281 | args->converter->toUnicodeStatus =0x00; | |
b75a7d8f A |
282 | } |
283 | } | |
284 | else{ | |
fd0068a8 A |
285 | if(mySourceChar == UCNV_TILDE) { |
286 | args->converter->mode = UCNV_TILDE; | |
287 | continue; | |
288 | } else if(mySourceChar <= 0x7f) { | |
289 | targetUniChar = (UChar)mySourceChar; /* ASCII */ | |
290 | myData->isEmptySegment = FALSE; /* the segment has something valid */ | |
291 | } else { | |
292 | targetUniChar = 0xffff; | |
293 | myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ | |
b75a7d8f | 294 | } |
b75a7d8f A |
295 | } |
296 | if(targetUniChar < 0xfffe){ | |
297 | if(args->offsets) { | |
298 | args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 1-(myData->isStateDBCS)); | |
299 | } | |
300 | ||
301 | *(myTarget++)=(UChar)targetUniChar; | |
302 | } | |
fd0068a8 | 303 | else /* targetUniChar>=0xfffe */ { |
374ca955 A |
304 | if(targetUniChar == 0xfffe){ |
305 | *err = U_INVALID_CHAR_FOUND; | |
306 | } | |
307 | else{ | |
308 | *err = U_ILLEGAL_CHAR_FOUND; | |
309 | } | |
fd0068a8 A |
310 | if(mySourceChar > 0xff){ |
311 | args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8); | |
312 | args->converter->toUBytes[1] = (uint8_t)mySourceChar; | |
46f4442e A |
313 | args->converter->toULength=2; |
314 | } | |
374ca955 A |
315 | else{ |
316 | args->converter->toUBytes[0] = (uint8_t)mySourceChar; | |
317 | args->converter->toULength=1; | |
b75a7d8f | 318 | } |
374ca955 | 319 | break; |
b75a7d8f A |
320 | } |
321 | } | |
322 | else{ | |
323 | *err =U_BUFFER_OVERFLOW_ERROR; | |
324 | break; | |
325 | } | |
326 | } | |
fd0068a8 | 327 | |
b75a7d8f A |
328 | args->target = myTarget; |
329 | args->source = mySource; | |
330 | } | |
331 | ||
332 | ||
333 | static void | |
334 | UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args, | |
335 | UErrorCode * err){ | |
336 | const UChar *mySource = args->source; | |
374ca955 | 337 | char *myTarget = args->target; |
b75a7d8f A |
338 | int32_t* offsets = args->offsets; |
339 | int32_t mySourceIndex = 0; | |
340 | int32_t myTargetIndex = 0; | |
374ca955 | 341 | int32_t targetLength = (int32_t)(args->targetLimit - myTarget); |
b75a7d8f | 342 | int32_t mySourceLength = (int32_t)(args->sourceLimit - args->source); |
b75a7d8f | 343 | uint32_t targetUniChar = 0x0000; |
73c04bcf | 344 | UChar32 mySourceChar = 0x0000; |
b75a7d8f A |
345 | UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo; |
346 | UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS; | |
b331163b | 347 | UBool oldIsTargetUCharDBCS; |
b75a7d8f A |
348 | int len =0; |
349 | const char* escSeq=NULL; | |
350 | ||
46f4442e A |
351 | /* Calling code already handles this situation. */ |
352 | /*if ((args->converter == NULL) || (args->targetLimit < myTarget) || (args->sourceLimit < args->source)){ | |
b75a7d8f A |
353 | *err = U_ILLEGAL_ARGUMENT_ERROR; |
354 | return; | |
46f4442e | 355 | }*/ |
374ca955 | 356 | if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) { |
b75a7d8f A |
357 | goto getTrail; |
358 | } | |
359 | /*writing the char to the output stream */ | |
360 | while (mySourceIndex < mySourceLength){ | |
361 | targetUniChar = missingCharMarker; | |
362 | if (myTargetIndex < targetLength){ | |
363 | ||
73c04bcf | 364 | mySourceChar = (UChar) mySource[mySourceIndex++]; |
b75a7d8f A |
365 | |
366 | ||
367 | oldIsTargetUCharDBCS = isTargetUCharDBCS; | |
368 | if(mySourceChar ==UCNV_TILDE){ | |
369 | /*concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);*/ | |
370 | len = ESC_LEN; | |
371 | escSeq = TILDE_ESCAPE; | |
372 | CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); | |
373 | continue; | |
46f4442e | 374 | } else if(mySourceChar <= 0x7f) { |
46f4442e A |
375 | targetUniChar = mySourceChar; |
376 | } else { | |
b331163b | 377 | int32_t length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData, |
b75a7d8f | 378 | mySourceChar,&targetUniChar,args->converter->useFallback); |
46f4442e A |
379 | /* we can only use lead bytes 21..7D and trail bytes 21..7E */ |
380 | if( length == 2 && | |
381 | (uint16_t)(targetUniChar - 0xa1a1) <= (0xfdfe - 0xa1a1) && | |
382 | (uint8_t)(targetUniChar - 0xa1) <= (0xfe - 0xa1) | |
383 | ) { | |
384 | targetUniChar -= 0x8080; | |
385 | } else { | |
386 | targetUniChar = missingCharMarker; | |
387 | } | |
b75a7d8f A |
388 | } |
389 | if (targetUniChar != missingCharMarker){ | |
390 | myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF); | |
391 | if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isEscapeAppended ){ | |
392 | /*Shifting from a double byte to single byte mode*/ | |
393 | if(!isTargetUCharDBCS){ | |
394 | len =ESC_LEN; | |
395 | escSeq = SB_ESCAPE; | |
396 | CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); | |
73c04bcf | 397 | myConverterData->isEscapeAppended = TRUE; |
b75a7d8f A |
398 | } |
399 | else{ /* Shifting from a single byte to double byte mode*/ | |
400 | len =ESC_LEN; | |
401 | escSeq = DB_ESCAPE; | |
402 | CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); | |
73c04bcf | 403 | myConverterData->isEscapeAppended = TRUE; |
b75a7d8f A |
404 | |
405 | } | |
406 | } | |
407 | ||
408 | if(isTargetUCharDBCS){ | |
409 | if( myTargetIndex <targetLength){ | |
46f4442e | 410 | myTarget[myTargetIndex++] =(char) (targetUniChar >> 8); |
b75a7d8f A |
411 | if(offsets){ |
412 | *(offsets++) = mySourceIndex-1; | |
413 | } | |
414 | if(myTargetIndex < targetLength){ | |
46f4442e | 415 | myTarget[myTargetIndex++] =(char) targetUniChar; |
b75a7d8f A |
416 | if(offsets){ |
417 | *(offsets++) = mySourceIndex-1; | |
418 | } | |
419 | }else{ | |
46f4442e | 420 | args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; |
b75a7d8f A |
421 | *err = U_BUFFER_OVERFLOW_ERROR; |
422 | } | |
423 | }else{ | |
46f4442e A |
424 | args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) (targetUniChar >> 8); |
425 | args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; | |
b75a7d8f A |
426 | *err = U_BUFFER_OVERFLOW_ERROR; |
427 | } | |
428 | ||
429 | }else{ | |
430 | if( myTargetIndex <targetLength){ | |
374ca955 | 431 | myTarget[myTargetIndex++] = (char) (targetUniChar ); |
b75a7d8f A |
432 | if(offsets){ |
433 | *(offsets++) = mySourceIndex-1; | |
434 | } | |
435 | ||
436 | }else{ | |
437 | args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; | |
438 | *err = U_BUFFER_OVERFLOW_ERROR; | |
439 | } | |
440 | } | |
441 | ||
442 | } | |
443 | else{ | |
374ca955 | 444 | /* oops.. the code point is unassigned */ |
b75a7d8f A |
445 | /*Handle surrogates */ |
446 | /*check if the char is a First surrogate*/ | |
4388f060 A |
447 | if(U16_IS_SURROGATE(mySourceChar)) { |
448 | if(U16_IS_SURROGATE_LEAD(mySourceChar)) { | |
374ca955 | 449 | args->converter->fromUChar32=mySourceChar; |
b75a7d8f A |
450 | getTrail: |
451 | /*look ahead to find the trail surrogate*/ | |
452 | if(mySourceIndex < mySourceLength) { | |
453 | /* test the following code unit */ | |
454 | UChar trail=(UChar) args->source[mySourceIndex]; | |
4388f060 | 455 | if(U16_IS_TRAIL(trail)) { |
b75a7d8f | 456 | ++mySourceIndex; |
4388f060 | 457 | mySourceChar=U16_GET_SUPPLEMENTARY(args->converter->fromUChar32, trail); |
374ca955 | 458 | args->converter->fromUChar32=0x00; |
b75a7d8f A |
459 | /* there are no surrogates in GB2312*/ |
460 | *err = U_INVALID_CHAR_FOUND; | |
b75a7d8f A |
461 | /* exit this condition tree */ |
462 | } else { | |
463 | /* this is an unmatched lead code unit (1st surrogate) */ | |
464 | /* callback(illegal) */ | |
b75a7d8f A |
465 | *err=U_ILLEGAL_CHAR_FOUND; |
466 | } | |
467 | } else { | |
468 | /* no more input */ | |
469 | *err = U_ZERO_ERROR; | |
b75a7d8f A |
470 | } |
471 | } else { | |
472 | /* this is an unmatched trail code unit (2nd surrogate) */ | |
473 | /* callback(illegal) */ | |
b75a7d8f A |
474 | *err=U_ILLEGAL_CHAR_FOUND; |
475 | } | |
374ca955 A |
476 | } else { |
477 | /* callback(unassigned) for a BMP code point */ | |
478 | *err = U_INVALID_CHAR_FOUND; | |
b75a7d8f A |
479 | } |
480 | ||
374ca955 A |
481 | args->converter->fromUChar32=mySourceChar; |
482 | break; | |
b75a7d8f A |
483 | } |
484 | } | |
485 | else{ | |
486 | *err = U_BUFFER_OVERFLOW_ERROR; | |
487 | break; | |
488 | } | |
489 | targetUniChar=missingCharMarker; | |
490 | } | |
b75a7d8f A |
491 | |
492 | args->target += myTargetIndex; | |
493 | args->source += mySourceIndex; | |
494 | myConverterData->isTargetUCharDBCS = isTargetUCharDBCS; | |
495 | } | |
496 | ||
497 | static void | |
498 | _HZ_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) { | |
499 | UConverter *cnv = args->converter; | |
500 | UConverterDataHZ *convData=(UConverterDataHZ *) cnv->extraInfo; | |
501 | char *p; | |
502 | char buffer[4]; | |
503 | p = buffer; | |
504 | ||
505 | if( convData->isTargetUCharDBCS){ | |
506 | *p++= UCNV_TILDE; | |
507 | *p++= UCNV_CLOSE_BRACE; | |
508 | convData->isTargetUCharDBCS=FALSE; | |
509 | } | |
73c04bcf | 510 | *p++= (char)cnv->subChars[0]; |
b75a7d8f A |
511 | |
512 | ucnv_cbFromUWriteBytes(args, | |
513 | buffer, (int32_t)(p - buffer), | |
514 | offsetIndex, err); | |
515 | } | |
516 | ||
73c04bcf A |
517 | /* |
518 | * Structure for cloning an HZ converter into a single memory block. | |
519 | * ucnv_safeClone() of the HZ converter will align the entire cloneHZStruct, | |
520 | * and then ucnv_safeClone() of the sub-converter may additionally align | |
521 | * subCnv inside the cloneHZStruct, for which we need the deadSpace after | |
522 | * subCnv. This is because UAlignedMemory may be larger than the actually | |
523 | * necessary alignment size for the platform. | |
524 | * The other cloneHZStruct fields will not be moved around, | |
525 | * and are aligned properly with cloneHZStruct's alignment. | |
526 | */ | |
374ca955 | 527 | struct cloneHZStruct |
b75a7d8f A |
528 | { |
529 | UConverter cnv; | |
b75a7d8f | 530 | UConverter subCnv; |
73c04bcf | 531 | UAlignedMemory deadSpace; |
b75a7d8f A |
532 | UConverterDataHZ mydata; |
533 | }; | |
534 | ||
535 | ||
536 | static UConverter * | |
537 | _HZ_SafeClone(const UConverter *cnv, | |
538 | void *stackBuffer, | |
539 | int32_t *pBufferSize, | |
540 | UErrorCode *status) | |
541 | { | |
374ca955 A |
542 | struct cloneHZStruct * localClone; |
543 | int32_t size, bufferSizeNeeded = sizeof(struct cloneHZStruct); | |
b75a7d8f A |
544 | |
545 | if (U_FAILURE(*status)){ | |
546 | return 0; | |
547 | } | |
548 | ||
549 | if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */ | |
550 | *pBufferSize = bufferSizeNeeded; | |
551 | return 0; | |
552 | } | |
553 | ||
374ca955 | 554 | localClone = (struct cloneHZStruct *)stackBuffer; |
73c04bcf | 555 | /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ |
b75a7d8f A |
556 | |
557 | uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataHZ)); | |
558 | localClone->cnv.extraInfo = &localClone->mydata; | |
559 | localClone->cnv.isExtraLocal = TRUE; | |
560 | ||
561 | /* deep-clone the sub-converter */ | |
73c04bcf | 562 | size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */ |
b75a7d8f A |
563 | ((UConverterDataHZ*)localClone->cnv.extraInfo)->gbConverter = |
564 | ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localClone->subCnv, &size, status); | |
565 | ||
566 | return &localClone->cnv; | |
567 | } | |
568 | ||
569 | static void | |
570 | _HZ_GetUnicodeSet(const UConverter *cnv, | |
73c04bcf | 571 | const USetAdder *sa, |
b75a7d8f A |
572 | UConverterUnicodeSet which, |
573 | UErrorCode *pErrorCode) { | |
46f4442e A |
574 | /* HZ converts all of ASCII */ |
575 | sa->addRange(sa->set, 0, 0x7f); | |
b75a7d8f A |
576 | |
577 | /* add all of the code points that the sub-converter handles */ | |
46f4442e A |
578 | ucnv_MBCSGetFilteredUnicodeSetForUnicode( |
579 | ((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData, | |
580 | sa, which, UCNV_SET_FILTER_HZ, | |
581 | pErrorCode); | |
b75a7d8f A |
582 | } |
583 | ||
584 | static const UConverterImpl _HZImpl={ | |
585 | ||
586 | UCNV_HZ, | |
587 | ||
588 | NULL, | |
589 | NULL, | |
590 | ||
591 | _HZOpen, | |
592 | _HZClose, | |
593 | _HZReset, | |
594 | ||
595 | UConverter_toUnicode_HZ_OFFSETS_LOGIC, | |
596 | UConverter_toUnicode_HZ_OFFSETS_LOGIC, | |
597 | UConverter_fromUnicode_HZ_OFFSETS_LOGIC, | |
598 | UConverter_fromUnicode_HZ_OFFSETS_LOGIC, | |
599 | NULL, | |
600 | ||
601 | NULL, | |
602 | NULL, | |
603 | _HZ_WriteSub, | |
604 | _HZ_SafeClone, | |
605 | _HZ_GetUnicodeSet | |
606 | }; | |
607 | ||
608 | static const UConverterStaticData _HZStaticData={ | |
609 | sizeof(UConverterStaticData), | |
610 | "HZ", | |
611 | 0, | |
612 | UCNV_IBM, | |
613 | UCNV_HZ, | |
614 | 1, | |
615 | 4, | |
616 | { 0x1a, 0, 0, 0 }, | |
617 | 1, | |
618 | FALSE, | |
619 | FALSE, | |
620 | 0, | |
621 | 0, | |
622 | { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */ | |
623 | ||
624 | }; | |
2ca993e8 A |
625 | |
626 | const UConverterSharedData _HZData= | |
627 | UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_HZStaticData, &_HZImpl); | |
b75a7d8f | 628 | |
b331163b | 629 | #endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION */ |